Files
agent-framework/python/packages/devui/tests/test_multimodal_workflow.py
T
Eduard van Valkenburg 838a7fd61d Python: [BREAKING] Types API Review improvements (#3647)
* Replace Role and FinishReason classes with NewType + Literal

- Remove EnumLike metaclass from _types.py
- Replace Role class with NewType('Role', str) + RoleLiteral
- Replace FinishReason class with NewType('FinishReason', str) + FinishReasonLiteral
- Update all usages across codebase to use string literals
- Remove .value access patterns (direct string comparison now works)
- Add backward compatibility for legacy dict serialization format
- Update tests to reflect new string-based types

Addresses #3591, #3615

* Simplify ChatResponse and AgentResponse type hints (#3592)

- Remove overloads from ChatResponse.__init__
- Remove text parameter from ChatResponse.__init__
- Remove | dict[str, Any] from finish_reason and usage_details params
- Remove **kwargs from AgentResponse.__init__
- Both now accept ChatMessage | Sequence[ChatMessage] | None for messages
- Update docstrings and examples to reflect changes
- Fix tests that were using removed kwargs
- Fix Role type hint usage in ag-ui utils

* Remove text parameter from ChatResponseUpdate and AgentResponseUpdate (#3597)

- Remove text parameter from ChatResponseUpdate.__init__
- Remove text parameter from AgentResponseUpdate.__init__
- Remove **kwargs from both update classes
- Simplify contents parameter type to Sequence[Content] | None
- Update all usages to use contents=[Content.from_text(...)] pattern
- Fix imports in test files
- Update docstrings and examples

* Rename from_chat_response_updates to from_updates (#3593)

- ChatResponse.from_chat_response_updates → ChatResponse.from_updates
- ChatResponse.from_chat_response_generator → ChatResponse.from_update_generator
- AgentResponse.from_agent_run_response_updates → AgentResponse.from_updates

* Remove try_parse_value method from ChatResponse and AgentResponse (#3595)

- Remove try_parse_value method from ChatResponse
- Remove try_parse_value method from AgentResponse
- Remove try_parse_value calls from from_updates and from_update_generator methods
- Update samples to use try/except with response.value instead
- Update tests to use response.value pattern
- Users should now use response.value with try/except for safe parsing

* Add agent_id to AgentResponse and clarify author_name documentation (#3596)

- Add agent_id parameter to AgentResponse class
- Document that author_name is on ChatMessage objects, not responses
- Update ChatResponse docstring with author_name note
- Update AgentResponse docstring with author_name note

* Simplify ChatMessage.__init__ signature (#3618)

- Make contents a positional argument accepting Sequence[Content | str]
- Auto-convert strings in contents to TextContent
- Remove overloads, keep text kwarg for backward compatibility with serialization
- Update _parse_content_list to handle string items
- Update all usages across codebase to use new format: ChatMessage("role", ["text"])

* Allow Content as input on run and get_response

- Update prepare_messages and normalize_messages to accept Content
- Update type signatures in _agents.py and _clients.py
- Add tests for Content input handling

* Fix ChatMessage usage across packages and samples

Update all remaining ChatMessage(role=..., text=...) to use new
ChatMessage('role', ['text']) signature.

* Fix Role string usage and response format parsing

- Fix redis provider: remove .value access on string literals
- Fix durabletask ensure_response_format: set _response_format before accessing .value

* Fix ollama .value and ai_model_id issues, handle None in content list

- Fix ollama _chat_client: remove .value on string literals
- Fix ollama _chat_client: rename ai_model_id to model_id
- Fix _parse_content_list: skip None values gracefully

* Fix A2AAgent type signature to include Content

* Fix Role/FinishReason NewType dict annotations and improve test coverage to 95%

* Fix mypy errors for Role/FinishReason NewType usage

* Fix Role.TOOL and Role.ASSISTANT usage in _orchestrator_helpers.py

* Fix Role NewType usage in durabletask _models.py
2026-02-04 10:13:23 +00:00

155 lines
6.1 KiB
Python

# Copyright (c) Microsoft. All rights reserved.
"""Test multimodal input handling for workflows.
This test verifies that workflows with AgentExecutor nodes correctly receive
multimodal content (images, files) from the DevUI frontend.
"""
import json
from unittest.mock import MagicMock
from agent_framework_devui._discovery import EntityDiscovery
from agent_framework_devui._executor import AgentFrameworkExecutor
from agent_framework_devui._mapper import MessageMapper
# Create a small test image (1x1 red pixel PNG)
TEST_IMAGE_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
TEST_IMAGE_DATA_URI = f"data:image/png;base64,{TEST_IMAGE_BASE64}"
class TestMultimodalWorkflowInput:
"""Test multimodal input handling for workflows."""
def test_is_openai_multimodal_format_detects_message_format(self):
"""Test that _is_openai_multimodal_format correctly detects OpenAI format."""
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)
# Valid OpenAI multimodal format
valid_format = [
{
"type": "message",
"role": "user",
"content": [
{"type": "input_text", "text": "Describe this image"},
{"type": "input_image", "image_url": TEST_IMAGE_DATA_URI},
],
}
]
assert executor._is_openai_multimodal_format(valid_format) is True
# Invalid formats
assert executor._is_openai_multimodal_format({}) is False # dict, not list
assert executor._is_openai_multimodal_format([]) is False # empty list
assert executor._is_openai_multimodal_format("hello") is False # string
assert executor._is_openai_multimodal_format([{"type": "other"}]) is False # wrong type
assert executor._is_openai_multimodal_format([{"foo": "bar"}]) is False # no type field
def test_convert_openai_input_to_chat_message_with_image(self):
"""Test that OpenAI format with image is converted to ChatMessage with DataContent."""
from agent_framework import ChatMessage
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)
# OpenAI format input with text and image (as sent by frontend)
openai_input = [
{
"type": "message",
"role": "user",
"content": [
{"type": "input_text", "text": "Describe this image"},
{"type": "input_image", "image_url": TEST_IMAGE_DATA_URI},
],
}
]
# Convert to ChatMessage
result = executor._convert_input_to_chat_message(openai_input)
# Verify result is ChatMessage
assert isinstance(result, ChatMessage), f"Expected ChatMessage, got {type(result)}"
assert result.role == "user"
# Verify contents
assert len(result.contents) == 2, f"Expected 2 contents, got {len(result.contents)}"
# First content should be text
assert result.contents[0].type == "text"
assert result.contents[0].text == "Describe this image"
# Second content should be image (DataContent)
assert result.contents[1].type == "data"
assert result.contents[1].media_type == "image/png"
assert result.contents[1].uri == TEST_IMAGE_DATA_URI
def test_parse_workflow_input_handles_json_string_with_multimodal(self):
"""Test that _parse_workflow_input correctly handles JSON string with multimodal content."""
import asyncio
from agent_framework import ChatMessage
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)
# This is what the frontend sends: JSON stringified OpenAI format
openai_input = [
{
"type": "message",
"role": "user",
"content": [
{"type": "input_text", "text": "What is in this image?"},
{"type": "input_image", "image_url": TEST_IMAGE_DATA_URI},
],
}
]
json_string_input = json.dumps(openai_input)
# Mock workflow
mock_workflow = MagicMock()
# Parse the input
result = asyncio.run(executor._parse_workflow_input(mock_workflow, json_string_input))
# Verify result is ChatMessage with multimodal content
assert isinstance(result, ChatMessage), f"Expected ChatMessage, got {type(result)}"
assert len(result.contents) == 2
# Verify text content
assert result.contents[0].type == "text"
assert result.contents[0].text == "What is in this image?"
# Verify image content
assert result.contents[1].type == "data"
assert result.contents[1].media_type == "image/png"
def test_parse_workflow_input_still_handles_simple_dict(self):
"""Test that simple dict input still works (backward compatibility)."""
import asyncio
from agent_framework import ChatMessage
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)
# Simple dict input (old format)
simple_input = {"text": "Hello world", "role": "user"}
json_string_input = json.dumps(simple_input)
# Mock workflow with ChatMessage input type
mock_workflow = MagicMock()
mock_executor = MagicMock()
mock_executor.input_types = [ChatMessage]
mock_workflow.get_start_executor.return_value = mock_executor
# Parse the input
result = asyncio.run(executor._parse_workflow_input(mock_workflow, json_string_input))
# Result should be ChatMessage (from _parse_structured_workflow_input)
assert isinstance(result, ChatMessage), f"Expected ChatMessage, got {type(result)}"