mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
838a7fd61d
* Replace Role and FinishReason classes with NewType + Literal
- Remove EnumLike metaclass from _types.py
- Replace Role class with NewType('Role', str) + RoleLiteral
- Replace FinishReason class with NewType('FinishReason', str) + FinishReasonLiteral
- Update all usages across codebase to use string literals
- Remove .value access patterns (direct string comparison now works)
- Add backward compatibility for legacy dict serialization format
- Update tests to reflect new string-based types
Addresses #3591, #3615
* Simplify ChatResponse and AgentResponse type hints (#3592)
- Remove overloads from ChatResponse.__init__
- Remove text parameter from ChatResponse.__init__
- Remove | dict[str, Any] from finish_reason and usage_details params
- Remove **kwargs from AgentResponse.__init__
- Both now accept ChatMessage | Sequence[ChatMessage] | None for messages
- Update docstrings and examples to reflect changes
- Fix tests that were using removed kwargs
- Fix Role type hint usage in ag-ui utils
* Remove text parameter from ChatResponseUpdate and AgentResponseUpdate (#3597)
- Remove text parameter from ChatResponseUpdate.__init__
- Remove text parameter from AgentResponseUpdate.__init__
- Remove **kwargs from both update classes
- Simplify contents parameter type to Sequence[Content] | None
- Update all usages to use contents=[Content.from_text(...)] pattern
- Fix imports in test files
- Update docstrings and examples
* Rename from_chat_response_updates to from_updates (#3593)
- ChatResponse.from_chat_response_updates → ChatResponse.from_updates
- ChatResponse.from_chat_response_generator → ChatResponse.from_update_generator
- AgentResponse.from_agent_run_response_updates → AgentResponse.from_updates
* Remove try_parse_value method from ChatResponse and AgentResponse (#3595)
- Remove try_parse_value method from ChatResponse
- Remove try_parse_value method from AgentResponse
- Remove try_parse_value calls from from_updates and from_update_generator methods
- Update samples to use try/except with response.value instead
- Update tests to use response.value pattern
- Users should now use response.value with try/except for safe parsing
* Add agent_id to AgentResponse and clarify author_name documentation (#3596)
- Add agent_id parameter to AgentResponse class
- Document that author_name is on ChatMessage objects, not responses
- Update ChatResponse docstring with author_name note
- Update AgentResponse docstring with author_name note
* Simplify ChatMessage.__init__ signature (#3618)
- Make contents a positional argument accepting Sequence[Content | str]
- Auto-convert strings in contents to TextContent
- Remove overloads, keep text kwarg for backward compatibility with serialization
- Update _parse_content_list to handle string items
- Update all usages across codebase to use new format: ChatMessage("role", ["text"])
* Allow Content as input on run and get_response
- Update prepare_messages and normalize_messages to accept Content
- Update type signatures in _agents.py and _clients.py
- Add tests for Content input handling
* Fix ChatMessage usage across packages and samples
Update all remaining ChatMessage(role=..., text=...) to use new
ChatMessage('role', ['text']) signature.
* Fix Role string usage and response format parsing
- Fix redis provider: remove .value access on string literals
- Fix durabletask ensure_response_format: set _response_format before accessing .value
* Fix ollama .value and ai_model_id issues, handle None in content list
- Fix ollama _chat_client: remove .value on string literals
- Fix ollama _chat_client: rename ai_model_id to model_id
- Fix _parse_content_list: skip None values gracefully
* Fix A2AAgent type signature to include Content
* Fix Role/FinishReason NewType dict annotations and improve test coverage to 95%
* Fix mypy errors for Role/FinishReason NewType usage
* Fix Role.TOOL and Role.ASSISTANT usage in _orchestrator_helpers.py
* Fix Role NewType usage in durabletask _models.py
155 lines
6.1 KiB
Python
155 lines
6.1 KiB
Python
# Copyright (c) Microsoft. All rights reserved.
|
|
|
|
"""Test multimodal input handling for workflows.
|
|
|
|
This test verifies that workflows with AgentExecutor nodes correctly receive
|
|
multimodal content (images, files) from the DevUI frontend.
|
|
"""
|
|
|
|
import json
|
|
from unittest.mock import MagicMock
|
|
|
|
from agent_framework_devui._discovery import EntityDiscovery
|
|
from agent_framework_devui._executor import AgentFrameworkExecutor
|
|
from agent_framework_devui._mapper import MessageMapper
|
|
|
|
# Create a small test image (1x1 red pixel PNG)
|
|
TEST_IMAGE_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
|
|
TEST_IMAGE_DATA_URI = f"data:image/png;base64,{TEST_IMAGE_BASE64}"
|
|
|
|
|
|
class TestMultimodalWorkflowInput:
|
|
"""Test multimodal input handling for workflows."""
|
|
|
|
def test_is_openai_multimodal_format_detects_message_format(self):
|
|
"""Test that _is_openai_multimodal_format correctly detects OpenAI format."""
|
|
discovery = MagicMock(spec=EntityDiscovery)
|
|
mapper = MagicMock(spec=MessageMapper)
|
|
executor = AgentFrameworkExecutor(discovery, mapper)
|
|
|
|
# Valid OpenAI multimodal format
|
|
valid_format = [
|
|
{
|
|
"type": "message",
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "input_text", "text": "Describe this image"},
|
|
{"type": "input_image", "image_url": TEST_IMAGE_DATA_URI},
|
|
],
|
|
}
|
|
]
|
|
assert executor._is_openai_multimodal_format(valid_format) is True
|
|
|
|
# Invalid formats
|
|
assert executor._is_openai_multimodal_format({}) is False # dict, not list
|
|
assert executor._is_openai_multimodal_format([]) is False # empty list
|
|
assert executor._is_openai_multimodal_format("hello") is False # string
|
|
assert executor._is_openai_multimodal_format([{"type": "other"}]) is False # wrong type
|
|
assert executor._is_openai_multimodal_format([{"foo": "bar"}]) is False # no type field
|
|
|
|
def test_convert_openai_input_to_chat_message_with_image(self):
|
|
"""Test that OpenAI format with image is converted to ChatMessage with DataContent."""
|
|
from agent_framework import ChatMessage
|
|
|
|
discovery = MagicMock(spec=EntityDiscovery)
|
|
mapper = MagicMock(spec=MessageMapper)
|
|
executor = AgentFrameworkExecutor(discovery, mapper)
|
|
|
|
# OpenAI format input with text and image (as sent by frontend)
|
|
openai_input = [
|
|
{
|
|
"type": "message",
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "input_text", "text": "Describe this image"},
|
|
{"type": "input_image", "image_url": TEST_IMAGE_DATA_URI},
|
|
],
|
|
}
|
|
]
|
|
|
|
# Convert to ChatMessage
|
|
result = executor._convert_input_to_chat_message(openai_input)
|
|
|
|
# Verify result is ChatMessage
|
|
assert isinstance(result, ChatMessage), f"Expected ChatMessage, got {type(result)}"
|
|
assert result.role == "user"
|
|
|
|
# Verify contents
|
|
assert len(result.contents) == 2, f"Expected 2 contents, got {len(result.contents)}"
|
|
|
|
# First content should be text
|
|
assert result.contents[0].type == "text"
|
|
assert result.contents[0].text == "Describe this image"
|
|
|
|
# Second content should be image (DataContent)
|
|
assert result.contents[1].type == "data"
|
|
assert result.contents[1].media_type == "image/png"
|
|
assert result.contents[1].uri == TEST_IMAGE_DATA_URI
|
|
|
|
def test_parse_workflow_input_handles_json_string_with_multimodal(self):
|
|
"""Test that _parse_workflow_input correctly handles JSON string with multimodal content."""
|
|
import asyncio
|
|
|
|
from agent_framework import ChatMessage
|
|
|
|
discovery = MagicMock(spec=EntityDiscovery)
|
|
mapper = MagicMock(spec=MessageMapper)
|
|
executor = AgentFrameworkExecutor(discovery, mapper)
|
|
|
|
# This is what the frontend sends: JSON stringified OpenAI format
|
|
openai_input = [
|
|
{
|
|
"type": "message",
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "input_text", "text": "What is in this image?"},
|
|
{"type": "input_image", "image_url": TEST_IMAGE_DATA_URI},
|
|
],
|
|
}
|
|
]
|
|
json_string_input = json.dumps(openai_input)
|
|
|
|
# Mock workflow
|
|
mock_workflow = MagicMock()
|
|
|
|
# Parse the input
|
|
result = asyncio.run(executor._parse_workflow_input(mock_workflow, json_string_input))
|
|
|
|
# Verify result is ChatMessage with multimodal content
|
|
assert isinstance(result, ChatMessage), f"Expected ChatMessage, got {type(result)}"
|
|
assert len(result.contents) == 2
|
|
|
|
# Verify text content
|
|
assert result.contents[0].type == "text"
|
|
assert result.contents[0].text == "What is in this image?"
|
|
|
|
# Verify image content
|
|
assert result.contents[1].type == "data"
|
|
assert result.contents[1].media_type == "image/png"
|
|
|
|
def test_parse_workflow_input_still_handles_simple_dict(self):
|
|
"""Test that simple dict input still works (backward compatibility)."""
|
|
import asyncio
|
|
|
|
from agent_framework import ChatMessage
|
|
|
|
discovery = MagicMock(spec=EntityDiscovery)
|
|
mapper = MagicMock(spec=MessageMapper)
|
|
executor = AgentFrameworkExecutor(discovery, mapper)
|
|
|
|
# Simple dict input (old format)
|
|
simple_input = {"text": "Hello world", "role": "user"}
|
|
json_string_input = json.dumps(simple_input)
|
|
|
|
# Mock workflow with ChatMessage input type
|
|
mock_workflow = MagicMock()
|
|
mock_executor = MagicMock()
|
|
mock_executor.input_types = [ChatMessage]
|
|
mock_workflow.get_start_executor.return_value = mock_executor
|
|
|
|
# Parse the input
|
|
result = asyncio.run(executor._parse_workflow_input(mock_workflow, json_string_input))
|
|
|
|
# Result should be ChatMessage (from _parse_structured_workflow_input)
|
|
assert isinstance(result, ChatMessage), f"Expected ChatMessage, got {type(result)}"
|