mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
40ab6e9d67
* name changes executed * updated adr to accepted * renamed openai base config * renamed openai config to mixin * added renames in user docs * reverted mcperror * fix tests * remove sse from tests
450 lines
18 KiB
Python
450 lines
18 KiB
Python
# Copyright (c) Microsoft. All rights reserved.
|
|
|
|
from collections.abc import AsyncIterable
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
import pytest
|
|
from agent_framework import (
|
|
AgentRunResponse,
|
|
AgentRunResponseUpdate,
|
|
ChatMessage,
|
|
ChatResponse,
|
|
ChatResponseUpdate,
|
|
Role,
|
|
TextContent,
|
|
)
|
|
from agent_framework._agents import BaseAgent
|
|
from agent_framework._clients import ChatClientProtocol as AFChatClient
|
|
|
|
from agent_framework_workflow import (
|
|
Executor,
|
|
MagenticBuilder,
|
|
MagenticManagerBase,
|
|
MagenticPlanReviewDecision,
|
|
MagenticPlanReviewReply,
|
|
MagenticPlanReviewRequest,
|
|
MagenticProgressLedger,
|
|
MagenticProgressLedgerItem,
|
|
RequestInfoEvent,
|
|
WorkflowCompletedEvent,
|
|
WorkflowContext,
|
|
WorkflowEvent, # type: ignore # noqa: E402
|
|
handler,
|
|
)
|
|
from agent_framework_workflow._magentic import (
|
|
MagenticContext,
|
|
MagenticStartMessage,
|
|
)
|
|
|
|
|
|
def test_magentic_start_message_from_string():
|
|
msg = MagenticStartMessage.from_string("Do the thing")
|
|
assert isinstance(msg, MagenticStartMessage)
|
|
assert isinstance(msg.task, ChatMessage)
|
|
assert msg.task.role == Role.USER
|
|
assert msg.task.text == "Do the thing"
|
|
|
|
|
|
def test_plan_review_request_defaults_and_reply_variants():
|
|
req = MagenticPlanReviewRequest() # defaults provided by dataclass
|
|
assert hasattr(req, "request_id")
|
|
assert req.task_text == "" and req.facts_text == "" and req.plan_text == ""
|
|
assert isinstance(req.round_index, int) and req.round_index == 0
|
|
|
|
# Replies: approve, revise with comments, revise with edited text
|
|
approve = MagenticPlanReviewReply(decision=MagenticPlanReviewDecision.APPROVE)
|
|
revise_comments = MagenticPlanReviewReply(decision=MagenticPlanReviewDecision.REVISE, comments="Tighten scope")
|
|
revise_text = MagenticPlanReviewReply(
|
|
decision=MagenticPlanReviewDecision.REVISE,
|
|
edited_plan_text="- Step 1\n- Step 2",
|
|
)
|
|
|
|
assert approve.decision == MagenticPlanReviewDecision.APPROVE
|
|
assert revise_comments.comments == "Tighten scope"
|
|
assert revise_text.edited_plan_text is not None and revise_text.edited_plan_text.startswith("- Step 1")
|
|
|
|
|
|
def test_magentic_context_reset_behavior():
|
|
ctx = MagenticContext(
|
|
task=ChatMessage(role=Role.USER, text="task"),
|
|
participant_descriptions={"Alice": "Researcher"},
|
|
)
|
|
# seed context state
|
|
ctx.chat_history.append(ChatMessage(role=Role.ASSISTANT, text="draft"))
|
|
ctx.stall_count = 2
|
|
prev_reset = ctx.reset_count
|
|
|
|
ctx.reset()
|
|
|
|
assert ctx.chat_history == []
|
|
assert ctx.stall_count == 0
|
|
assert ctx.reset_count == prev_reset + 1
|
|
|
|
|
|
@dataclass
|
|
class _SimpleLedger:
|
|
facts: ChatMessage
|
|
plan: ChatMessage
|
|
|
|
|
|
class FakeManager(MagenticManagerBase):
|
|
"""Deterministic manager for tests that avoids real LLM calls."""
|
|
|
|
task_ledger: _SimpleLedger | None = None
|
|
satisfied_after_signoff: bool = True
|
|
next_speaker_name: str = "agentA"
|
|
instruction_text: str = "Proceed with step 1"
|
|
|
|
async def plan(self, magentic_context: MagenticContext) -> ChatMessage:
|
|
facts = ChatMessage(role=Role.ASSISTANT, text="GIVEN OR VERIFIED FACTS\n- A\n")
|
|
plan = ChatMessage(role=Role.ASSISTANT, text="- Do X\n- Do Y\n")
|
|
self.task_ledger = _SimpleLedger(facts=facts, plan=plan)
|
|
combined = f"Task: {magentic_context.task.text}\n\nFacts:\n{facts.text}\n\nPlan:\n{plan.text}"
|
|
return ChatMessage(role=Role.ASSISTANT, text=combined, author_name="magentic_manager")
|
|
|
|
async def replan(self, magentic_context: MagenticContext) -> ChatMessage:
|
|
facts = ChatMessage(role=Role.ASSISTANT, text="GIVEN OR VERIFIED FACTS\n- A2\n")
|
|
plan = ChatMessage(role=Role.ASSISTANT, text="- Do Z\n")
|
|
self.task_ledger = _SimpleLedger(facts=facts, plan=plan)
|
|
combined = f"Task: {magentic_context.task.text}\n\nFacts:\n{facts.text}\n\nPlan:\n{plan.text}"
|
|
return ChatMessage(role=Role.ASSISTANT, text=combined, author_name="magentic_manager")
|
|
|
|
async def create_progress_ledger(self, magentic_context: MagenticContext) -> MagenticProgressLedger:
|
|
is_satisfied = self.satisfied_after_signoff and len(magentic_context.chat_history) > 0
|
|
return MagenticProgressLedger(
|
|
is_request_satisfied=MagenticProgressLedgerItem(reason="test", answer=is_satisfied),
|
|
is_in_loop=MagenticProgressLedgerItem(reason="test", answer=False),
|
|
is_progress_being_made=MagenticProgressLedgerItem(reason="test", answer=True),
|
|
next_speaker=MagenticProgressLedgerItem(reason="test", answer=self.next_speaker_name),
|
|
instruction_or_question=MagenticProgressLedgerItem(reason="test", answer=self.instruction_text),
|
|
)
|
|
|
|
async def prepare_final_answer(self, magentic_context: MagenticContext) -> ChatMessage:
|
|
return ChatMessage(role=Role.ASSISTANT, text="FINAL", author_name="magentic_manager")
|
|
|
|
|
|
async def test_standard_manager_plan_and_replan_combined_ledger():
|
|
manager = FakeManager(max_round_count=10, max_stall_count=3, max_reset_count=2)
|
|
ctx = MagenticContext(
|
|
task=ChatMessage(role=Role.USER, text="demo task"),
|
|
participant_descriptions={"agentA": "Agent A"},
|
|
)
|
|
|
|
first = await manager.plan(ctx.model_copy(deep=True))
|
|
assert first.role == Role.ASSISTANT and "Facts:" in first.text and "Plan:" in first.text
|
|
assert manager.task_ledger is not None
|
|
|
|
replanned = await manager.replan(ctx.model_copy(deep=True))
|
|
assert "A2" in replanned.text or "Do Z" in replanned.text
|
|
|
|
|
|
async def test_standard_manager_progress_ledger_and_fallback():
|
|
manager = FakeManager(max_round_count=10)
|
|
ctx = MagenticContext(
|
|
task=ChatMessage(role=Role.USER, text="demo"),
|
|
participant_descriptions={"agentA": "Agent A"},
|
|
)
|
|
|
|
ledger = await manager.create_progress_ledger(ctx.model_copy(deep=True))
|
|
assert isinstance(ledger, MagenticProgressLedger)
|
|
assert ledger.next_speaker.answer == "agentA"
|
|
|
|
manager.satisfied_after_signoff = False
|
|
ledger2 = await manager.create_progress_ledger(ctx.model_copy(deep=True))
|
|
assert ledger2.is_request_satisfied.answer is False
|
|
|
|
|
|
async def test_magentic_workflow_plan_review_approval_to_completion():
|
|
manager = FakeManager(max_round_count=10)
|
|
wf = (
|
|
MagenticBuilder()
|
|
.participants(agentA=_DummyExec("agentA"))
|
|
.with_standard_manager(manager)
|
|
.with_plan_review()
|
|
.build()
|
|
)
|
|
|
|
req_event: RequestInfoEvent | None = None
|
|
async for ev in wf.run_stream("do work"):
|
|
if isinstance(ev, RequestInfoEvent) and ev.request_type is MagenticPlanReviewRequest:
|
|
req_event = ev
|
|
assert req_event is not None
|
|
|
|
completed: WorkflowCompletedEvent | None = None
|
|
async for ev in wf.send_responses_streaming({
|
|
req_event.request_id: MagenticPlanReviewReply(decision=MagenticPlanReviewDecision.APPROVE)
|
|
}):
|
|
if isinstance(ev, WorkflowCompletedEvent):
|
|
completed = ev
|
|
break
|
|
assert completed is not None
|
|
assert isinstance(getattr(completed, "data", None), ChatMessage)
|
|
|
|
|
|
async def test_magentic_plan_review_approve_with_comments_replans_and_proceeds():
|
|
class CountingManager(FakeManager):
|
|
# Declare as a model field so assignment is allowed under Pydantic
|
|
replan_count: int = 0
|
|
|
|
def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def]
|
|
super().__init__(*args, **kwargs)
|
|
|
|
async def replan(self, magentic_context: MagenticContext) -> ChatMessage: # type: ignore[override]
|
|
self.replan_count += 1
|
|
return await super().replan(magentic_context)
|
|
|
|
manager = CountingManager(max_round_count=10)
|
|
wf = (
|
|
MagenticBuilder()
|
|
.participants(agentA=_DummyExec("agentA"))
|
|
.with_standard_manager(manager)
|
|
.with_plan_review()
|
|
.build()
|
|
)
|
|
|
|
# Wait for the initial plan review request
|
|
req_event: RequestInfoEvent | None = None
|
|
async for ev in wf.run_stream("do work"):
|
|
if isinstance(ev, RequestInfoEvent) and ev.request_type is MagenticPlanReviewRequest:
|
|
req_event = ev
|
|
assert req_event is not None
|
|
|
|
# Reply APPROVE with comments (no edited text). Expect one replan and no second review round.
|
|
saw_second_review = False
|
|
completed: WorkflowCompletedEvent | None = None
|
|
async for ev in wf.send_responses_streaming({
|
|
req_event.request_id: MagenticPlanReviewReply(
|
|
decision=MagenticPlanReviewDecision.APPROVE,
|
|
comments="Looks good; consider Z",
|
|
)
|
|
}):
|
|
if isinstance(ev, RequestInfoEvent) and ev.request_type is MagenticPlanReviewRequest:
|
|
saw_second_review = True
|
|
if isinstance(ev, WorkflowCompletedEvent):
|
|
completed = ev
|
|
break
|
|
|
|
assert completed is not None
|
|
assert manager.replan_count >= 1
|
|
assert saw_second_review is False
|
|
# Replan from FakeManager updates facts/plan to include A2 / Do Z
|
|
assert manager.task_ledger is not None
|
|
combined_text = (manager.task_ledger.facts.text or "") + (manager.task_ledger.plan.text or "")
|
|
assert ("A2" in combined_text) or ("Do Z" in combined_text)
|
|
|
|
|
|
async def test_magentic_orchestrator_round_limit_produces_partial_result():
|
|
manager = FakeManager(max_round_count=1)
|
|
manager.satisfied_after_signoff = False
|
|
wf = MagenticBuilder().participants(agentA=_DummyExec("agentA")).with_standard_manager(manager).build()
|
|
|
|
from agent_framework_workflow import WorkflowEvent # type: ignore
|
|
|
|
events: list[WorkflowEvent] = []
|
|
async for ev in wf.run_stream("round limit test"):
|
|
events.append(ev)
|
|
if len(events) > 50:
|
|
break
|
|
|
|
completed = next((e for e in events if isinstance(e, WorkflowCompletedEvent)), None)
|
|
assert completed is not None
|
|
data = getattr(completed, "data", None)
|
|
assert isinstance(data, ChatMessage)
|
|
assert data.role == Role.ASSISTANT
|
|
|
|
|
|
class _DummyExec(Executor):
|
|
def __init__(self, name: str) -> None:
|
|
super().__init__(name)
|
|
|
|
@handler
|
|
async def _noop(self, message: object, ctx: WorkflowContext[object]) -> None: # pragma: no cover - not called
|
|
pass
|
|
|
|
|
|
from agent_framework_workflow import StandardMagenticManager # noqa: E402
|
|
|
|
|
|
class _StubChatClient(AFChatClient):
|
|
async def get_response(self, messages, **kwargs): # type: ignore[override]
|
|
return ChatResponse(messages=[ChatMessage(role=Role.ASSISTANT, text="ok")])
|
|
|
|
def get_streaming_response(self, messages, **kwargs) -> AsyncIterable[ChatResponseUpdate]: # type: ignore[override]
|
|
async def _gen():
|
|
if False:
|
|
yield ChatResponseUpdate() # pragma: no cover
|
|
|
|
return _gen()
|
|
|
|
|
|
async def test_standard_manager_plan_and_replan_via_complete_monkeypatch():
|
|
mgr = StandardMagenticManager(chat_client=_StubChatClient())
|
|
|
|
async def fake_complete_plan(messages: list[ChatMessage], **kwargs: Any) -> ChatMessage:
|
|
# Return a different response depending on call order length
|
|
if any("FACTS" in (m.text or "") for m in messages):
|
|
return ChatMessage(role=Role.ASSISTANT, text="- step A\n- step B")
|
|
return ChatMessage(role=Role.ASSISTANT, text="GIVEN OR VERIFIED FACTS\n- fact1")
|
|
|
|
# First, patch to produce facts then plan
|
|
mgr._complete = fake_complete_plan # type: ignore[attr-defined]
|
|
|
|
ctx = MagenticContext(
|
|
task=ChatMessage(role=Role.USER, text="T"),
|
|
participant_descriptions={"A": "desc"},
|
|
)
|
|
combined = await mgr.plan(ctx.model_copy(deep=True))
|
|
# Assert structural headings and that steps appear in the combined ledger output.
|
|
assert "We are working to address the following user request:" in combined.text
|
|
assert "Here is the plan to follow as best as possible:" in combined.text
|
|
assert any(t in combined.text for t in ("- step A", "- step B", "- step"))
|
|
|
|
# Now replan with new outputs
|
|
async def fake_complete_replan(messages: list[ChatMessage], **kwargs: Any) -> ChatMessage:
|
|
if any("Please briefly explain" in (m.text or "") for m in messages):
|
|
return ChatMessage(role=Role.ASSISTANT, text="- new step")
|
|
return ChatMessage(role=Role.ASSISTANT, text="GIVEN OR VERIFIED FACTS\n- updated")
|
|
|
|
mgr._complete = fake_complete_replan # type: ignore[attr-defined]
|
|
combined2 = await mgr.replan(ctx.model_copy(deep=True))
|
|
assert "updated" in combined2.text or "new step" in combined2.text
|
|
|
|
|
|
async def test_standard_manager_progress_ledger_success_and_error():
|
|
mgr = StandardMagenticManager(chat_client=_StubChatClient())
|
|
ctx = MagenticContext(
|
|
task=ChatMessage(role=Role.USER, text="task"),
|
|
participant_descriptions={"alice": "desc"},
|
|
)
|
|
|
|
# Success path: valid JSON
|
|
async def fake_complete_ok(messages: list[ChatMessage], **kwargs: Any) -> ChatMessage:
|
|
json_text = (
|
|
'{"is_request_satisfied": {"reason": "r", "answer": false}, '
|
|
'"is_in_loop": {"reason": "r", "answer": false}, '
|
|
'"is_progress_being_made": {"reason": "r", "answer": true}, '
|
|
'"next_speaker": {"reason": "r", "answer": "alice"}, '
|
|
'"instruction_or_question": {"reason": "r", "answer": "do"}}'
|
|
)
|
|
return ChatMessage(role=Role.ASSISTANT, text=json_text)
|
|
|
|
mgr._complete = fake_complete_ok # type: ignore[attr-defined]
|
|
ledger = await mgr.create_progress_ledger(ctx.model_copy(deep=True))
|
|
assert ledger.next_speaker.answer == "alice"
|
|
|
|
# Error path: invalid JSON now raises to avoid emitting planner-oriented instructions to agents
|
|
async def fake_complete_bad(messages: list[ChatMessage], **kwargs: Any) -> ChatMessage:
|
|
return ChatMessage(role=Role.ASSISTANT, text="not-json")
|
|
|
|
mgr._complete = fake_complete_bad # type: ignore[attr-defined]
|
|
with pytest.raises(RuntimeError):
|
|
await mgr.create_progress_ledger(ctx.model_copy(deep=True))
|
|
|
|
|
|
class InvokeOnceManager(MagenticManagerBase):
|
|
def __init__(self) -> None:
|
|
super().__init__(max_round_count=5, max_stall_count=3, max_reset_count=2)
|
|
self._invoked = False
|
|
|
|
async def plan(self, magentic_context: MagenticContext) -> ChatMessage:
|
|
return ChatMessage(role=Role.ASSISTANT, text="ledger")
|
|
|
|
async def replan(self, magentic_context: MagenticContext) -> ChatMessage:
|
|
return ChatMessage(role=Role.ASSISTANT, text="re-ledger")
|
|
|
|
async def create_progress_ledger(self, magentic_context: MagenticContext) -> MagenticProgressLedger:
|
|
if not self._invoked:
|
|
# First round: ask agentA to respond
|
|
self._invoked = True
|
|
return MagenticProgressLedger(
|
|
is_request_satisfied=MagenticProgressLedgerItem(reason="r", answer=False),
|
|
is_in_loop=MagenticProgressLedgerItem(reason="r", answer=False),
|
|
is_progress_being_made=MagenticProgressLedgerItem(reason="r", answer=True),
|
|
next_speaker=MagenticProgressLedgerItem(reason="r", answer="agentA"),
|
|
instruction_or_question=MagenticProgressLedgerItem(reason="r", answer="say hi"),
|
|
)
|
|
# Next round: mark satisfied so run can conclude
|
|
return MagenticProgressLedger(
|
|
is_request_satisfied=MagenticProgressLedgerItem(reason="r", answer=True),
|
|
is_in_loop=MagenticProgressLedgerItem(reason="r", answer=False),
|
|
is_progress_being_made=MagenticProgressLedgerItem(reason="r", answer=True),
|
|
next_speaker=MagenticProgressLedgerItem(reason="r", answer="agentA"),
|
|
instruction_or_question=MagenticProgressLedgerItem(reason="r", answer="done"),
|
|
)
|
|
|
|
async def prepare_final_answer(self, magentic_context: MagenticContext) -> ChatMessage:
|
|
return ChatMessage(role=Role.ASSISTANT, text="final")
|
|
|
|
|
|
class StubThreadAgent(BaseAgent):
|
|
async def run_stream(self, messages=None, *, thread=None, **kwargs): # type: ignore[override]
|
|
yield AgentRunResponseUpdate(
|
|
contents=[TextContent(text="thread-ok")],
|
|
author_name="agentA",
|
|
role=Role.ASSISTANT,
|
|
)
|
|
|
|
async def run(self, messages=None, *, thread=None, **kwargs): # type: ignore[override]
|
|
return AgentRunResponse(messages=[ChatMessage(role=Role.ASSISTANT, text="thread-ok", author_name="agentA")])
|
|
|
|
|
|
class StubAssistantsClient:
|
|
pass # class name used for branch detection
|
|
|
|
|
|
class StubAssistantsAgent(BaseAgent):
|
|
chat_client: object | None = None # allow assignment via Pydantic field
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.chat_client = StubAssistantsClient() # type name contains 'AssistantsClient'
|
|
|
|
async def run_stream(self, messages=None, *, thread=None, **kwargs): # type: ignore[override]
|
|
yield AgentRunResponseUpdate(
|
|
contents=[TextContent(text="assistants-ok")],
|
|
author_name="agentA",
|
|
role=Role.ASSISTANT,
|
|
)
|
|
|
|
async def run(self, messages=None, *, thread=None, **kwargs): # type: ignore[override]
|
|
return AgentRunResponse(messages=[ChatMessage(role=Role.ASSISTANT, text="assistants-ok", author_name="agentA")])
|
|
|
|
|
|
async def _collect_agent_responses_setup(participant_obj: object):
|
|
captured: list[ChatMessage] = []
|
|
|
|
async def sink(event) -> None: # type: ignore[no-untyped-def]
|
|
from agent_framework_workflow._magentic import MagenticAgentMessageEvent
|
|
|
|
if isinstance(event, MagenticAgentMessageEvent) and event.message is not None:
|
|
captured.append(event.message)
|
|
|
|
wf = (
|
|
MagenticBuilder()
|
|
.participants(agentA=participant_obj) # type: ignore[arg-type]
|
|
.with_standard_manager(InvokeOnceManager())
|
|
.on_event(sink) # type: ignore
|
|
.build()
|
|
)
|
|
|
|
# Run a bounded stream to allow one invoke and then completion
|
|
events: list[WorkflowEvent] = []
|
|
async for ev in wf.run_stream("task"): # plan review disabled
|
|
events.append(ev)
|
|
if len(events) > 50:
|
|
break
|
|
|
|
return captured
|
|
|
|
|
|
async def test_agent_executor_invoke_with_thread_chat_client():
|
|
captured = await _collect_agent_responses_setup(StubThreadAgent())
|
|
# Should have at least one response from agentA via MagenticAgentExecutor path
|
|
assert any((m.author_name == "agentA" and "ok" in (m.text or "")) for m in captured)
|
|
|
|
|
|
async def test_agent_executor_invoke_with_assistants_client_messages():
|
|
captured = await _collect_agent_responses_setup(StubAssistantsAgent())
|
|
assert any((m.author_name == "agentA" and "ok" in (m.text or "")) for m in captured)
|