Python: Fix AG-UI reasoning role and multimodal media parsing to follow specification (#5389)

* Fix AG-UI reasoning role and multimodal media value field parsing

Fix two spec compliance issues in the AG-UI integration:

1. ReasoningMessageStartEvent now uses role='reasoning' instead of
   role='assistant', matching the AG-UI specification for reasoning
   messages.

2. _parse_multimodal_media_part now reads the 'value' field from source
   dicts (with fallback to 'data' for backward compatibility), matching
   the current AG-UI InputContentSource specification.

Bump ag-ui-protocol dependency from ==0.1.13 to >=0.1.16,<0.2 to pick
up the SDK fix that accepts role='reasoning' in ReasoningMessageStartEvent.

Fix pre-existing pyright reportMissingImports errors for orjson in sample
files, and fix import ordering in foundry-hosted-agents sample.

Fixes #5340

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* Python: Fix AG-UI reasoning role and multimodal media parsing to follow specification

Fixes #5340

* Remove unintended .maf-runtime-ready marker file

Address PR review feedback: the .maf-runtime-ready file is not referenced anywhere in the repo and was left over from automation.

Fixes #5340

* Python: Fix duplicate AG-UI multimodal 'value' parsing in snapshot path

The snapshot normalization path used a second copy of the multimodal source
parsing logic that still read the deprecated 'data' field. When clients sent
base64 media with source={"type": "base64", "value": ...}, the snapshot event
emitted by the server dropped the payload, causing AG-UI-compatible clients
to crash on ingest.

Extract the shared source-field extraction into _extract_multimodal_source_fields
so both _parse_multimodal_media_part and the snapshot _legacy_binary_part stay
in sync with the AG-UI spec. Add snapshot-path regression tests covering
value-only, value-preferred-over-data, and the legacy data-field fallback.

Addresses review feedback on #5389 from @Rickyneer.

---------

Co-authored-by: Copilot <copilot@github.com>
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Evan Mattson
2026-04-24 13:12:34 +09:00
committed by GitHub
Unverified
parent 0989e68d1c
commit 932ceddf95
6 changed files with 204 additions and 50 deletions
@@ -263,27 +263,21 @@ def _deduplicate_messages(messages: list[Message]) -> list[Message]:
return unique_messages
def _parse_multimodal_media_part(part: dict[str, Any]) -> Content | None:
"""Convert a multimodal media part into Agent Framework content."""
part_type = str(part.get("type", "")).lower()
source = part.get("source")
def _extract_multimodal_source_fields(
part: dict[str, Any],
) -> tuple[str | None, str | None, str | None, str | None]:
"""Extract ``(url, data, binary_id, mime_type)`` from an AG-UI multimodal part.
mime_type = cast(
str | None,
part.get("mimeType")
or part.get("mime_type")
or {
"image": "image/*",
"audio": "audio/*",
"video": "video/*",
"document": "application/octet-stream",
"binary": "application/octet-stream",
}.get(part_type, "application/octet-stream"),
)
Handles both the current AG-UI spec (``source.value`` for base64 payloads) and the
legacy ``source.data`` field for backward compatibility. Returned values are the
raw extracted strings (or ``None`` when absent); callers apply their own defaults.
"""
mime_type = cast(str | None, part.get("mimeType") or part.get("mime_type"))
url = cast(str | None, part.get("url") or part.get("uri"))
data = cast(str | None, part.get("data"))
binary_id = cast(str | None, part.get("id"))
source = part.get("source")
if isinstance(source, dict):
source_dict = cast(dict[str, Any], source)
source_type = str(source_dict.get("type", "")).lower()
@@ -294,14 +288,31 @@ def _parse_multimodal_media_part(part: dict[str, Any]) -> Content | None:
if source_type in {"url", "uri"}:
url = cast(str | None, source_dict.get("url") or source_dict.get("uri"))
elif source_type in {"base64", "data", "binary"}:
data = cast(str | None, source_dict.get("data"))
data = cast(str | None, source_dict.get("value") or source_dict.get("data"))
elif source_type in {"id", "file"}:
binary_id = cast(str | None, source_dict.get("id"))
else:
url = cast(str | None, source_dict.get("url") or source_dict.get("uri") or url)
data = cast(str | None, source_dict.get("data") or data)
data = cast(str | None, source_dict.get("value") or source_dict.get("data") or data)
binary_id = cast(str | None, source_dict.get("id") or binary_id)
return url, data, binary_id, mime_type
def _parse_multimodal_media_part(part: dict[str, Any]) -> Content | None:
"""Convert a multimodal media part into Agent Framework content."""
part_type = str(part.get("type", "")).lower()
url, data, binary_id, mime_type = _extract_multimodal_source_fields(part)
if not mime_type:
mime_type = {
"image": "image/*",
"audio": "audio/*",
"video": "video/*",
"document": "application/octet-stream",
"binary": "application/octet-stream",
}.get(part_type, "application/octet-stream")
if isinstance(url, str) and url:
return Content.from_uri(uri=url, media_type=mime_type)
@@ -389,30 +400,7 @@ def _normalize_snapshot_content(content: Any) -> Any:
def _legacy_binary_part(part: dict[str, Any]) -> dict[str, Any]:
"""Convert draft/legacy multimodal parts to AG-UI snapshot binary shape."""
normalized: dict[str, Any] = {"type": "binary"}
mime_type = cast(str | None, part.get("mimeType") or part.get("mime_type"))
url = cast(str | None, part.get("url") or part.get("uri"))
data = cast(str | None, part.get("data"))
binary_id = cast(str | None, part.get("id"))
source = part.get("source")
if isinstance(source, dict):
source_part = cast(dict[str, Any], source)
source_mime = source_part.get("mimeType") or source_part.get("mime_type")
if isinstance(source_mime, str) and source_mime:
mime_type = source_mime
source_type = str(source_part.get("type", "")).lower()
if source_type in {"url", "uri"}:
url = cast(str | None, source_part.get("url") or source_part.get("uri"))
elif source_type in {"base64", "data", "binary"}:
data = cast(str | None, source_part.get("data"))
elif source_type in {"id", "file"}:
binary_id = cast(str | None, source_part.get("id"))
else:
url = cast(str | None, source_part.get("url") or source_part.get("uri") or url)
data = cast(str | None, source_part.get("data") or data)
binary_id = cast(str | None, source_part.get("id") or binary_id)
url, data, binary_id, mime_type = _extract_multimodal_source_fields(part)
if isinstance(mime_type, str) and mime_type:
normalized["mimeType"] = mime_type
@@ -596,7 +596,7 @@ def _emit_text_reasoning(content: Content, flow: FlowState | None = None) -> lis
events.extend(_close_reasoning_block(flow))
# Open new reasoning block.
events.append(ReasoningStartEvent(message_id=message_id))
events.append(ReasoningMessageStartEvent(message_id=message_id, role="assistant"))
events.append(ReasoningMessageStartEvent(message_id=message_id, role="reasoning"))
flow.reasoning_message_id = message_id
if text:
@@ -613,7 +613,7 @@ def _emit_text_reasoning(content: Content, flow: FlowState | None = None) -> lis
else:
# No flow -- backward-compatible full sequence per call.
events.append(ReasoningStartEvent(message_id=message_id))
events.append(ReasoningMessageStartEvent(message_id=message_id, role="assistant"))
events.append(ReasoningMessageStartEvent(message_id=message_id, role="reasoning"))
if text:
events.append(ReasoningMessageContentEvent(message_id=message_id, delta=text))
+1 -1
View File
@@ -23,7 +23,7 @@ classifiers = [
]
dependencies = [
"agent-framework-core>=1.1.1,<2",
"ag-ui-protocol==0.1.13",
"ag-ui-protocol>=0.1.16,<0.2",
"fastapi>=0.115.0,<0.133.1",
"uvicorn[standard]>=0.30.0,<0.42.0"
]
@@ -536,6 +536,77 @@ def test_agui_snapshot_format_preserves_multimodal_content():
assert content_parts[1]["url"] == "https://example.com/image.png"
def test_agui_snapshot_format_reads_base64_value_field():
"""Snapshot normalization reads the spec 'value' field for base64 sources."""
payload = base64.b64encode(b"abc").decode("utf-8")
normalized = agui_messages_to_snapshot_format(
[
{
"role": "user",
"content": [
{
"type": "image",
"source": {"type": "base64", "value": payload, "mimeType": "image/png"},
},
],
}
]
)
binary_part = normalized[0]["content"][0]
assert binary_part["type"] == "binary"
assert binary_part["mimeType"] == "image/png"
assert binary_part["data"] == payload
def test_agui_snapshot_format_base64_value_preferred_over_data():
"""Snapshot normalization prefers 'value' when both 'value' and 'data' are set."""
value_payload = base64.b64encode(b"new-spec").decode("utf-8")
data_payload = base64.b64encode(b"legacy").decode("utf-8")
normalized = agui_messages_to_snapshot_format(
[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"value": value_payload,
"data": data_payload,
"mimeType": "image/png",
},
},
],
}
]
)
binary_part = normalized[0]["content"][0]
assert binary_part["data"] == value_payload
def test_agui_snapshot_format_base64_data_field_backward_compat():
"""Snapshot normalization still reads the legacy 'data' field when 'value' is absent."""
payload = base64.b64encode(b"legacy").decode("utf-8")
normalized = agui_messages_to_snapshot_format(
[
{
"role": "user",
"content": [
{
"type": "image",
"source": {"type": "base64", "data": payload, "mimeType": "image/png"},
},
],
}
]
)
binary_part = normalized[0]["content"][0]
assert binary_part["data"] == payload
def test_agui_with_tool_calls_to_agent_framework():
"""Assistant message with tool_calls is converted to FunctionCallContent."""
agui_msg = {
@@ -1760,3 +1831,67 @@ class TestReasoningRoundTrip:
assert "First answer" in texts
assert "Follow-up question" in texts
assert "Prior reasoning" not in texts
def test_parse_multimodal_media_part_base64_value_field():
"""Source with type='base64' reads data from the 'value' field per AG-UI spec."""
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
result = _parse_multimodal_media_part(
{"type": "image", "source": {"type": "base64", "value": "aGVsbG8=", "mimeType": "image/png"}}
)
assert result is not None
assert "aGVsbG8=" in result.uri
def test_parse_multimodal_media_part_data_source_value_field():
"""Source with type='data' reads data from the 'value' field per AG-UI spec."""
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
result = _parse_multimodal_media_part(
{"type": "image", "source": {"type": "data", "value": "aGVsbG8=", "mimeType": "image/png"}}
)
assert result is not None
assert "aGVsbG8=" in result.uri
def test_parse_multimodal_media_part_base64_data_field_backward_compat():
"""Source with type='base64' still supports deprecated 'data' field."""
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
result = _parse_multimodal_media_part(
{"type": "image", "source": {"type": "base64", "data": "aGVsbG8=", "mimeType": "image/png"}}
)
assert result is not None
assert "aGVsbG8=" in result.uri
def test_parse_multimodal_media_part_value_preferred_over_data():
"""When both 'value' and 'data' are present, 'value' takes precedence."""
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
result = _parse_multimodal_media_part(
{
"type": "image",
"source": {
"type": "base64",
"value": "dmFsdWU=",
"data": "ZGF0YQ==",
"mimeType": "image/png",
},
}
)
assert result is not None
# 'value' field content should be used (base64 of "value")
assert "dmFsdWU=" in result.uri
def test_parse_multimodal_media_part_unknown_source_value_fallback():
"""Unknown source type falls back to 'value' field before 'data' field."""
from agent_framework_ag_ui._message_adapters import _parse_multimodal_media_part
result = _parse_multimodal_media_part(
{"type": "image", "source": {"type": "custom", "value": "aGVsbG8=", "mimeType": "image/png"}}
)
assert result is not None
assert "aGVsbG8=" in result.uri
+32 -1
View File
@@ -1244,7 +1244,7 @@ class TestEmitTextReasoning:
assert events[0].message_id == "reason_1"
assert isinstance(events[1], ReasoningMessageStartEvent)
assert events[1].message_id == "reason_1"
assert events[1].role == "assistant"
assert events[1].role == "reasoning"
assert isinstance(events[2], ReasoningMessageContentEvent)
assert events[2].message_id == "reason_1"
assert events[2].delta == "The user is asking about weather, so I should call the weather tool."
@@ -1642,6 +1642,37 @@ class TestReasoningInSnapshot:
assert close[0].message_id == "block2"
class TestReasoningEventRole:
"""Tests that reasoning events use role='reasoning' per AG-UI spec."""
def test_reasoning_role_without_flow(self):
"""ReasoningMessageStartEvent uses role='reasoning' in non-flow mode."""
content = Content.from_text_reasoning(
id="reason_role_1",
text="Thinking about the question.",
)
events = _emit_text_reasoning(content)
msg_starts = [e for e in events if isinstance(e, ReasoningMessageStartEvent)]
assert len(msg_starts) == 1
assert msg_starts[0].role == "reasoning"
def test_reasoning_role_with_flow(self):
"""ReasoningMessageStartEvent uses role='reasoning' in streaming flow mode."""
flow = FlowState()
content = Content.from_text_reasoning(
id="reason_role_2",
text="Reasoning in streaming mode.",
)
events = _emit_text_reasoning(content, flow)
msg_starts = [e for e in events if isinstance(e, ReasoningMessageStartEvent)]
assert len(msg_starts) == 1
assert msg_starts[0].role == "reasoning"
async def test_session_id_matches_thread_id():
"""Session created by run_agent_stream uses the client thread_id as session_id."""
from conftest import StubAgent
+4 -4
View File
@@ -84,14 +84,14 @@ wheels = [
[[package]]
name = "ag-ui-protocol"
version = "0.1.13"
version = "0.1.17"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/04/b5/fc0b65b561d00d88811c8a7d98ee735833f81554be244340950e7b65820c/ag_ui_protocol-0.1.13.tar.gz", hash = "sha256:811d7d7dcce4783dec252918f40b717ebfa559399bf6b071c4ba47c0c1e21bcb", size = 5671, upload-time = "2026-02-19T18:40:38.602Z" }
sdist = { url = "https://files.pythonhosted.org/packages/cb/0f/5a8ce5eb5cd7adf3f733da87d7a5a8a38f24ec9e029c1300f9495ae9c7fa/ag_ui_protocol-0.1.17.tar.gz", hash = "sha256:5fae4cfced8245c8ac329b85702fd166ff226d99dc1ea8a1ae95890826aa69e5", size = 6273, upload-time = "2026-04-20T21:09:19.436Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/cd/9f/b833c1ab1999da35ebad54841ae85d2c2764c931da9a6f52d8541b6901b2/ag_ui_protocol-0.1.13-py3-none-any.whl", hash = "sha256:1393fa894c1e8416efe184168a50689e760d05b32f4646eebb8ff423dddf8e8f", size = 8053, upload-time = "2026-02-19T18:40:37.27Z" },
{ url = "https://files.pythonhosted.org/packages/a0/82/e5f1686b4c4e232818c75598d651b82088f584e59adef71802920d74f82f/ag_ui_protocol-0.1.17-py3-none-any.whl", hash = "sha256:6a9065590d21c7b9b8ae9bb1a3410ecf4d18cb8041a077d25574f792dfa504fe", size = 8648, upload-time = "2026-04-20T21:09:20.488Z" },
]
[[package]]
@@ -183,7 +183,7 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "ag-ui-protocol", specifier = "==0.1.13" },
{ name = "ag-ui-protocol", specifier = ">=0.1.16,<0.2" },
{ name = "agent-framework-core", editable = "packages/core" },
{ name = "fastapi", specifier = ">=0.115.0,<0.133.1" },
{ name = "httpx", marker = "extra == 'dev'", specifier = "==0.28.1" },