// Copyright (c) Microsoft. All rights reserved. using System; using System.Collections.Generic; using System.Threading.Tasks; using Microsoft.Extensions.AI; namespace Microsoft.Agents.AI.Workflows.UnitTests; /// /// Tests for . /// public sealed class WorkflowEvaluationTests { [Fact] public void ExtractAgentData_EmptyEvents_ReturnsEmpty() { var result = WorkflowEvaluationExtensions.ExtractAgentData(new List(), splitter: null); Assert.Empty(result); } [Fact] public void ExtractAgentData_MatchedPair_ReturnsItem() { var events = new List { new ExecutorInvokedEvent("agent-1", "What is the weather?"), new ExecutorCompletedEvent("agent-1", "It's sunny."), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.True(result.ContainsKey("agent-1")); Assert.Single(result["agent-1"]); Assert.Equal("What is the weather?", result["agent-1"][0].Query); Assert.Equal("It's sunny.", result["agent-1"][0].Response); Assert.Equal(2, result["agent-1"][0].Conversation.Count); } [Fact] public void ExtractAgentData_UnmatchedInvocation_NotIncluded() { // An invocation without a matching completion should not appear in results var events = new List { new ExecutorInvokedEvent("agent-1", "Hello"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Empty(result); } [Fact] public void ExtractAgentData_CompletionWithoutInvocation_NotIncluded() { // A completion without a prior invocation should not appear in results var events = new List { new ExecutorCompletedEvent("agent-1", "Response"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Empty(result); } [Fact] public void ExtractAgentData_MultipleAgents_SeparatedByExecutorId() { var events = new List { new ExecutorInvokedEvent("agent-1", "Q1"), new ExecutorInvokedEvent("agent-2", "Q2"), new ExecutorCompletedEvent("agent-1", "A1"), new ExecutorCompletedEvent("agent-2", "A2"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Equal(2, result.Count); Assert.Equal("Q1", result["agent-1"][0].Query); Assert.Equal("A1", result["agent-1"][0].Response); Assert.Equal("Q2", result["agent-2"][0].Query); Assert.Equal("A2", result["agent-2"][0].Response); } [Fact] public void ExtractAgentData_DuplicateExecutorId_LastInvocationUsed() { // If the same executor is invoked twice before completing, // the second invocation overwrites the first var events = new List { new ExecutorInvokedEvent("agent-1", "First question"), new ExecutorInvokedEvent("agent-1", "Second question"), new ExecutorCompletedEvent("agent-1", "Answer"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.Single(result["agent-1"]); Assert.Equal("Second question", result["agent-1"][0].Query); } [Fact] public void ExtractAgentData_MultipleRoundsForSameExecutor_AllCaptured() { // Same executor invoked→completed twice (sequential rounds) var events = new List { new ExecutorInvokedEvent("agent-1", "Q1"), new ExecutorCompletedEvent("agent-1", "A1"), new ExecutorInvokedEvent("agent-1", "Q2"), new ExecutorCompletedEvent("agent-1", "A2"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); // one executor Assert.Equal(2, result["agent-1"].Count); // two items Assert.Equal("Q1", result["agent-1"][0].Query); Assert.Equal("Q2", result["agent-1"][1].Query); } [Fact] public void ExtractAgentData_NullData_UsesEmptyString() { var events = new List { new ExecutorInvokedEvent("agent-1", null!), new ExecutorCompletedEvent("agent-1", null), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.Equal(string.Empty, result["agent-1"][0].Query); Assert.Equal(string.Empty, result["agent-1"][0].Response); } [Fact] public void ExtractAgentData_WithSplitter_SetOnItems() { var splitter = ConversationSplitters.LastTurn; var events = new List { new ExecutorInvokedEvent("agent-1", "Q"), new ExecutorCompletedEvent("agent-1", "A"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter); Assert.Equal(splitter, result["agent-1"][0].Splitter); } [Fact] public void ExtractAgentData_ChatMessageData_ExtractsText() { // When Data is a ChatMessage, the fix should extract .Text instead of type name var queryMsg = new ChatMessage(ChatRole.User, "What is the weather?"); var responseMsg = new ChatMessage(ChatRole.Assistant, "It's sunny."); var events = new List { new ExecutorInvokedEvent("agent-1", queryMsg), new ExecutorCompletedEvent("agent-1", responseMsg), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.Equal("What is the weather?", result["agent-1"][0].Query); Assert.Equal("It's sunny.", result["agent-1"][0].Response); } [Fact] public void ExtractAgentData_ChatMessageListData_ExtractsLastUserText() { // When Data is IReadOnlyList, extract last user message text IReadOnlyList messages = new List { new(ChatRole.User, "First question"), new(ChatRole.Assistant, "First answer"), new(ChatRole.User, "Follow-up question"), }; var events = new List { new ExecutorInvokedEvent("agent-1", messages), new ExecutorCompletedEvent("agent-1", "Response text"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.Equal("Follow-up question", result["agent-1"][0].Query); } [Fact] public void ExtractAgentData_AgentResponseData_ExtractsText() { // When completed Data is an AgentResponse, extract .Text var agentResponse = new AgentResponse(new ChatMessage(ChatRole.Assistant, "Agent says hello")); var events = new List { new ExecutorInvokedEvent("agent-1", "Hi there"), new ExecutorCompletedEvent("agent-1", agentResponse), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.Equal("Hi there", result["agent-1"][0].Query); Assert.Equal("Agent says hello", result["agent-1"][0].Response); } [Fact] public void ExtractAgentData_AgentResponseData_PreservesFullMessages() { // When completed Data is an AgentResponse, the conversation should include // all response messages (tool calls, intermediate, etc.) not just a text summary var toolCallMsg = new ChatMessage(ChatRole.Assistant, [new FunctionCallContent("call_1", "get_weather", new Dictionary { ["city"] = "Seattle" })]); var toolResultMsg = new ChatMessage(ChatRole.Tool, [new FunctionResultContent("call_1", "Sunny, 72°F")]); var finalMsg = new ChatMessage(ChatRole.Assistant, "It's sunny and 72°F in Seattle."); var agentResponse = new AgentResponse { Messages = [toolCallMsg, toolResultMsg, finalMsg], }; var events = new List { new ExecutorInvokedEvent("agent-1", "What's the weather?"), new ExecutorCompletedEvent("agent-1", agentResponse), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); // Should have user query + all 3 response messages Assert.Equal(4, result["agent-1"][0].Conversation.Count); Assert.Equal(ChatRole.User, result["agent-1"][0].Conversation[0].Role); Assert.Equal(ChatRole.Assistant, result["agent-1"][0].Conversation[1].Role); Assert.Equal(ChatRole.Tool, result["agent-1"][0].Conversation[2].Role); Assert.Equal(ChatRole.Assistant, result["agent-1"][0].Conversation[3].Role); } [Fact] public void ExtractAgentData_UnknownObjectData_UsesToString() { // When Data is an unknown object type, the ToString() fallback should produce // the string representation (not a type name for known types) var events = new List { new ExecutorInvokedEvent("agent-1", 42), new ExecutorCompletedEvent("agent-1", 3.14), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.Equal("42", result["agent-1"][0].Query); Assert.Equal("3.14", result["agent-1"][0].Response); } [Fact] public void ExtractAgentData_SkipsInternalExecutors() { var events = new List { new ExecutorInvokedEvent("_internal", "internal query"), new ExecutorCompletedEvent("_internal", "internal response"), new ExecutorInvokedEvent("input-conversation", "start"), new ExecutorCompletedEvent("input-conversation", "done"), new ExecutorInvokedEvent("end-conversation", "end query"), new ExecutorCompletedEvent("end-conversation", "end response"), new ExecutorInvokedEvent("end", "end query"), new ExecutorCompletedEvent("end", "end response"), new ExecutorInvokedEvent("real-agent", "real query"), new ExecutorCompletedEvent("real-agent", "real response"), }; var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null); Assert.Single(result); Assert.True(result.ContainsKey("real-agent")); Assert.DoesNotContain("_internal", result.Keys); Assert.DoesNotContain("input-conversation", result.Keys); Assert.DoesNotContain("end-conversation", result.Keys); Assert.DoesNotContain("end", result.Keys); } // --------------------------------------------------------------- // BuildOverallItem tests (expected output / ground truth) // --------------------------------------------------------------- [Fact] public void BuildOverallItem_NoCompletedExecutorWithResponse_ReturnsNull() { // Arrange — no ExecutorCompletedEvent with usable response data and no AgentResponseEvent var events = new List { new ExecutorInvokedEvent("agent-1", "query"), }; // Act var item = WorkflowEvaluationExtensions.BuildOverallItem(events, splitter: null, expectedOutput: null); // Assert Assert.Null(item); } [Fact] public void BuildOverallItem_NoAgentResponseEvent_FallsBackToLastExecutorCompleted() { // Arrange — only ExecutorCompletedEvent (the default when EmitAgentResponseEvents is false) var finalResponse = new AgentResponse(new ChatMessage(ChatRole.Assistant, "Paris")); var events = new List { new ExecutorInvokedEvent("researcher", "What is the capital of France?"), new ExecutorCompletedEvent("researcher", new AgentResponse(new ChatMessage(ChatRole.Assistant, "draft"))), new ExecutorInvokedEvent("editor", "draft"), new ExecutorCompletedEvent("editor", finalResponse), }; // Act var item = WorkflowEvaluationExtensions.BuildOverallItem( events, splitter: null, expectedOutput: "Paris"); // Assert Assert.NotNull(item); Assert.Equal("What is the capital of France?", item.Query); Assert.Equal("Paris", item.Response); Assert.Equal("Paris", item.ExpectedOutput); } [Fact] public void BuildOverallItem_WithFinalResponseAndExpectedOutput_StampsExpectedOutput() { // Arrange var finalResponse = new AgentResponse(new ChatMessage(ChatRole.Assistant, "Ofrece 41 planes")); var events = new List { new ExecutorInvokedEvent("agent-1", "How many plans does Netlife offer?"), new ExecutorCompletedEvent("agent-1", finalResponse), new AgentResponseEvent("agent-1", finalResponse), }; // Act var item = WorkflowEvaluationExtensions.BuildOverallItem( events, splitter: null, expectedOutput: "Ofrece 41 planes"); // Assert Assert.NotNull(item); Assert.Equal("How many plans does Netlife offer?", item.Query); Assert.Equal("Ofrece 41 planes", item.Response); Assert.Equal("Ofrece 41 planes", item.ExpectedOutput); } [Fact] public void BuildOverallItem_WithFinalResponseAndNoExpectedOutput_LeavesExpectedOutputNull() { // Arrange var finalResponse = new AgentResponse(new ChatMessage(ChatRole.Assistant, "answer")); var events = new List { new ExecutorInvokedEvent("agent-1", "query"), new ExecutorCompletedEvent("agent-1", finalResponse), new AgentResponseEvent("agent-1", finalResponse), }; // Act var item = WorkflowEvaluationExtensions.BuildOverallItem(events, splitter: null, expectedOutput: null); // Assert Assert.NotNull(item); Assert.Null(item.ExpectedOutput); } [Fact] public async Task EvaluateAsync_WithIncludeOverallButNoFinalResponse_ThrowsAsync() { // Arrange — build a workflow whose AIAgentHostExecutor is NOT bound with // EmitAgentResponseEvents=true, so no AgentResponseEvent is emitted, and the // ExecutorCompletedEvent for the host carries null Data. That is the scenario // where BuildOverallItem returns null. When the caller asks for an overall // evaluation (includeOverall: true), we should fail fast rather than silently // returning empty results — regardless of whether expectedOutput was supplied. var agent = new TestEchoAgent(name: "echo"); var workflow = AgentWorkflowBuilder.BuildSequential(agent); var input = new List { new(ChatRole.User, "Hello") }; var evaluator = new LocalEvaluator( FunctionEvaluator.Create("noop", (EvalItem _) => true)); await using var run = await InProcessExecution.RunAsync(workflow, input); // Act + Assert — throws even without expectedOutput var ex = await Assert.ThrowsAsync(() => run.EvaluateAsync( evaluator, includeOverall: true, includePerAgent: false)); Assert.Contains("EmitAgentResponseEvents", ex.Message); } // --------------------------------------------------------------- // EvaluateAsync integration test // --------------------------------------------------------------- [Fact] public async Task EvaluateAsync_WithSequentialWorkflow_ReturnsPerAgentSubResultsAsync() { // Arrange: two agents in a sequential workflow var agent1 = new TestEchoAgent(name: "agent-one"); var agent2 = new TestEchoAgent(name: "agent-two"); var workflow = AgentWorkflowBuilder.BuildSequential(agent1, agent2); var input = new List { new(ChatRole.User, "Hello world") }; var evaluator = new LocalEvaluator( FunctionEvaluator.Create("has_content", (EvalItem item) => item.Conversation.Count > 0)); // Act await using var run = await InProcessExecution.RunAsync(workflow, input); var results = await run.EvaluateAsync(evaluator, includeOverall: false, includePerAgent: true); // Assert — results returned Assert.NotNull(results); // Assert — per-agent sub-results are populated Assert.NotNull(results.SubResults); Assert.True(results.SubResults.Count >= 2, $"Expected at least 2 agent sub-results, got {results.SubResults.Count}"); // Each sub-result should have evaluated items foreach (var (agentId, subResult) in results.SubResults) { Assert.True(subResult.Total > 0, $"Agent '{agentId}' should have at least one evaluated item"); } } }