mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
45527eed29
Merged and refactored eval module per Eduard's PR review: - Merge _eval.py + _local_eval.py into single _evaluation.py - Convert EvalItem from dataclass to regular class - Rename to_dict() to to_eval_data() - Convert _AgentEvalData to TypedDict - Simplify check system: unified async pattern with isawaitable - Parallelize checks and evaluators with asyncio.gather - Add all/any mode to tool_called_check - Fix bool(passed) truthy bug in _coerce_result - Remove deprecated function_evaluator/async_function_evaluator aliases - Remove _MinimalAgent, tighten evaluate_agent signature - Set self.name in __init__ (LocalEvaluator, FoundryEvals) - Limit FoundryEvals to AsyncOpenAI only - Type project_client as AIProjectClient - Remove NotImplementedError continuous eval code - Add evaluation samples in 02-agents/ and 03-workflows/ - Update all imports and tests (167 passing) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
61 lines
1.7 KiB
Python
61 lines
1.7 KiB
Python
# Copyright (c) Microsoft. All rights reserved.
|
|
|
|
"""Evaluate a multi-agent workflow with per-agent breakdown.
|
|
|
|
Demonstrates workflow evaluation:
|
|
1. Build a simple two-agent workflow
|
|
2. Run evaluate_workflow() which runs the workflow and evaluates each agent
|
|
3. Inspect per-agent results in sub_results
|
|
|
|
Usage:
|
|
uv run python samples/03-workflows/evaluation/evaluate_workflow.py
|
|
"""
|
|
|
|
import asyncio
|
|
|
|
from agent_framework import (
|
|
Agent,
|
|
AgentExecutor,
|
|
LocalEvaluator,
|
|
WorkflowBuilder,
|
|
evaluate_workflow,
|
|
evaluator,
|
|
keyword_check,
|
|
)
|
|
|
|
|
|
@evaluator
|
|
def is_nonempty(response: str) -> bool:
|
|
"""Check the agent produced a non-trivial response."""
|
|
return len(response.strip()) > 5
|
|
|
|
|
|
async def main():
|
|
# Build a simple planner → executor workflow
|
|
planner = Agent(model="gpt-4o-mini", instructions="You plan trips. Output a bullet-point plan.")
|
|
executor_agent = Agent(model="gpt-4o-mini", instructions="You execute travel plans. Book the items listed.")
|
|
|
|
builder = WorkflowBuilder()
|
|
builder.add_executor(AgentExecutor("planner", planner))
|
|
builder.add_executor(AgentExecutor("booker", executor_agent))
|
|
builder.add_edge("planner", "booker")
|
|
workflow = builder.build()
|
|
|
|
# Evaluate with per-agent breakdown
|
|
local = LocalEvaluator(is_nonempty, keyword_check("plan", "trip"))
|
|
|
|
results = await evaluate_workflow(
|
|
workflow=workflow,
|
|
queries=["Plan a weekend trip to Paris"],
|
|
evaluators=local,
|
|
)
|
|
|
|
for r in results:
|
|
print(f"{r.provider}: {r.passed}/{r.total} passed (overall)")
|
|
for agent_name, sub in r.sub_results.items():
|
|
print(f" {agent_name}: {sub.passed}/{sub.total}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|