diff --git a/dotnet/agent-framework-dotnet.slnx b/dotnet/agent-framework-dotnet.slnx index 6afa318012..5c846f0def 100644 --- a/dotnet/agent-framework-dotnet.slnx +++ b/dotnet/agent-framework-dotnet.slnx @@ -129,6 +129,7 @@ + diff --git a/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/Harness_Step05_Loop.csproj b/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/Harness_Step05_Loop.csproj new file mode 100644 index 0000000000..f5d6f368b6 --- /dev/null +++ b/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/Harness_Step05_Loop.csproj @@ -0,0 +1,21 @@ + + + + Exe + net10.0 + + enable + enable + + + + + + + + + + + + + diff --git a/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/Program.cs b/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/Program.cs new file mode 100644 index 0000000000..1e19dabc22 --- /dev/null +++ b/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/Program.cs @@ -0,0 +1,272 @@ +// Copyright (c) Microsoft. All rights reserved. + +// This sample demonstrates how to wrap a HarnessAgent with the LoopAgent decorator to re-invoke +// the agent until a configured LoopEvaluator decides to stop. It covers the common looping patterns +// through one decorator, each driven by a different evaluator: +// +// 1. Completion-marker (Ralph-style) loop — keep refining until the agent emits a completion +// marker, restarting each pass from a fresh context (CompletionMarkerLoopEvaluator + +// FreshContextPerIteration). +// 2. Delegate predicate (todos remaining) — loop while the built-in TodoProvider still has open +// items (DelegateLoopEvaluator). +// 3. AI judge — a second chat client decides whether the original request was answered, and the +// loop continues while the answer is "no" (AIJudgeLoopEvaluator). +// 4. Approval heuristics + loop — combine the LoopAgent with the ToolApprovalAgent auto-approval +// heuristics so a looped agent auto-approves tool calls instead of stalling on approval. +// +// The demos run sequentially and print each loop's final response. + +#pragma warning disable OPENAI001 // Suppress experimental API warnings for Responses API usage. +#pragma warning disable MAAI001 // Suppress experimental API warnings for Agents AI experiments. + +using System.ClientModel.Primitives; +using System.ComponentModel; +using Azure.AI.Projects; +using Azure.Identity; +using Microsoft.Agents.AI; +using Microsoft.Extensions.AI; + +var endpoint = Environment.GetEnvironmentVariable("AZURE_AI_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_AI_PROJECT_ENDPOINT is not set."); +var deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_MODEL_DEPLOYMENT_NAME") ?? "gpt-5.4"; + +// The HarnessAgent pre-configures function invocation, per-service-call chat history persistence, and +// context-window compaction. These bounds size the in-loop compaction window. +const int MaxContextWindowTokens = 1_050_000; +const int MaxOutputTokens = 32_000; + +// Build a single Foundry-backed IChatClient factory shared by every demo. Each call returns a fresh +// IChatClient over the same Responses endpoint. +var projectClient = new AIProjectClient( + new Uri(endpoint), + // WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production. + // In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid + // latency issues, unintended credential probing, and potential security risks from fallback mechanisms. + new DefaultAzureCredential(), + new AIProjectClientOptions { RetryPolicy = new ClientRetryPolicy(3) }); + +IChatClient CreateChatClient() => + projectClient.GetProjectOpenAIClient().GetResponsesClient().AsIChatClient(deploymentName); + +await RalphLoopAsync(); +await TodoLoopAsync(); +await JudgeLoopAsync(); +await ApprovalLoopAsync(); + +// Pattern 1: a "Ralph"-style loop that refines until the agent signals completion. +async Task RalphLoopAsync() +{ + Console.WriteLine("\n=== 1. Completion-marker (Ralph-style) loop — refine until COMPLETE (max 5) ==="); + + // Build a lean HarnessAgent: no todo or mode providers for this iterative-refinement task. + AIAgent harnessAgent = CreateLeanHarnessAgent( + name: "ralph", + instructions: + """ + You are iteratively refining a product name for a note-taking app. Each turn, build on the + feedback so far: propose an improved candidate with a short reason. When you are confident the + name is final, end your message with the exact marker COMPLETE. + """); + + // CompletionMarkerLoopEvaluator stops once the marker appears in the response; until then it + // re-invokes the agent. FreshContextPerIteration restarts each pass from the original task plus the + // aggregated feedback log on a brand-new session. Because each pass starts fresh, the agent has no + // memory of its prior suggestion — so the feedback template includes the {last_response} placeholder + // to echo the previous candidate back to it. + AIAgent loopAgent = new LoopAgent( + harnessAgent, + new CompletionMarkerLoopEvaluator("COMPLETE", options: new() + { + FeedbackMessageTemplate = + "Your previous suggestion was:\n" + CompletionMarkerLoopEvaluator.LastResponsePlaceholder + + "\n\nContinue to refine the name and remember to reply with " + + CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when happy.", + }), + new LoopAgentOptions { MaxIterations = 5, FreshContextPerIteration = true }); + + AgentResponse response = await StreamLoopAsync(loopAgent, "Suggest a name for a note-taking app."); + Console.WriteLine($"\nFinal response:\n{response.Text}"); +} + +// Pattern 2: loop while the built-in TodoProvider still has open items. +async Task TodoLoopAsync() +{ + Console.WriteLine("\n=== 2. Delegate predicate — loop while todos remain (max 6) ==="); + + // Keep the built-in TodoProvider enabled (only the mode provider is disabled) so the agent has + // todo tools to plan and track work. + AIAgent harnessAgent = CreateLeanHarnessAgent( + name: "planner", + instructions: + """ + You are a planning assistant. First break the task into todo items using your todo tools. + Then, on each turn, make progress and mark completed items as done. When all items are + complete, summarize the result. + """, + disableTodoProvider: false); + + // The predicate re-invokes the agent while any todo item is still open. The evaluator fetches the + // built-in TodoProvider from context.Agent (via GetService, which forwards through the harness + // decorators to the underlying ChatClientAgent's context providers), keeping the delegate + // self-contained, then queries it against the loop's current session. When items remain, it returns + // feedback telling the agent to finish them. MaxIterations guarantees the loop stops even if the + // agent stalls. + AIAgent loopAgent = new LoopAgent( + harnessAgent, + new DelegateLoopEvaluator(async (context, cancellationToken) => + { + var todoProvider = context.Agent.GetService() + ?? throw new InvalidOperationException("The agent did not expose a TodoProvider."); + var remaining = await todoProvider.GetRemainingTodosAsync(context.Session).ConfigureAwait(false); + return remaining.Count > 0 + ? LoopEvaluation.Continue($"Not all todos are complete yet ({remaining.Count} remaining). Please complete the remaining todo items.") + : LoopEvaluation.Stop(); + }), + new LoopAgentOptions { MaxIterations = 6 }); + + // The LoopAgent creates a single session up front and reuses it across iterations (non-fresh + // mode), so the todo state persists; the predicate reads it via context.Session. + AgentResponse response = await StreamLoopAsync( + loopAgent, + "Plan and outline a 3-section blog post about Rayleigh scattering."); + Console.WriteLine($"\nFinal response:\n{response.Text}"); +} + +// Pattern 3: a second chat client judges whether the original request was answered. +async Task JudgeLoopAsync() +{ + Console.WriteLine("\n=== 3. AI judge — loop until the request is answered (max 4) ==="); + + AIAgent harnessAgent = CreateLeanHarnessAgent( + name: "answerer", + instructions: "You are a helpful assistant. Answer the user's question thoroughly."); + + // The judge uses its own IChatClient. AIJudgeLoopEvaluator asks it (via a JudgeVerdict structured + // output) whether the original request has been fully addressed and continues while the answer is + // "no", injecting the judge's gap analysis as the next iteration's input. Judge loops use a small + // MaxIterations cap because each pass costs an extra model call. + AIAgent loopAgent = new LoopAgent( + harnessAgent, + new AIJudgeLoopEvaluator(CreateChatClient()), + new LoopAgentOptions { MaxIterations = 4 }); + + AgentResponse response = await StreamLoopAsync( + loopAgent, + "Explain why the sky is blue, then also explain why sunsets are red."); + Console.WriteLine($"\nFinal response:\n{response.Text}"); +} + +// Pattern 4: combine the loop with the ToolApprovalAgent auto-approval heuristics. +async Task ApprovalLoopAsync() +{ + Console.WriteLine("\n=== 4. Approval heuristics + loop — auto-approve tool calls in the loop (max 2) ==="); + + var deployTool = new ApprovalRequiredAIFunction( + AIFunctionFactory.Create(DeploymentTools.DeployService)); + + // Configure the HarnessAgent's built-in ToolApprovalAgent with an auto-approval rule. The rule + // approves the deploy_service call without prompting, so the inner agent resolves the approval + // internally and never surfaces a pending approval to the LoopAgent — letting the loop proceed. + AIAgent harnessAgent = CreateLeanHarnessAgent( + name: "operator", + instructions: "You are a deployment operator. Use the DeployService tool to fulfil requests.", + tools: [deployTool], + toolApprovalAgentOptions: new ToolApprovalAgentOptions + { + AutoApprovalRules = + [ + functionCall => + { + Console.WriteLine($" Auto-approving: {functionCall.Name}"); + return ValueTask.FromResult(true); + }, + ], + }); + + // Drive a short loop that continues until the response confirms the deployment. + AIAgent loopAgent = new LoopAgent( + harnessAgent, + new DelegateLoopEvaluator((context, _) => + new ValueTask( + context.LastResponse.Text.Contains("deployed", StringComparison.OrdinalIgnoreCase) + ? LoopEvaluation.Stop() + : LoopEvaluation.Continue())), + new LoopAgentOptions { MaxIterations = 2 }); + + // The LoopAgent reuses a single session across iterations, so the approval response flows back in. + AgentResponse response = await StreamLoopAsync(loopAgent, "Deploy the billing service."); + Console.WriteLine($"\nFinal response:\n{response.Text}"); +} + +// Streams a loop run to the console, printing updates live and marking each new inner run (detected +// via a change in ResponseId) with an "--- run N ---" header so you can see when the LoopAgent +// re-invokes the inner agent. Each message is prefixed with "User:" or "Agent:" based on its role, so +// the loop's on-behalf-of feedback (User) is visually distinct from the agent's responses (Agent). +// Returns the aggregated final response. +static async Task StreamLoopAsync(AIAgent loopAgent, string input, AgentSession? session = null) +{ + string? currentResponseId = null; + ChatRole? currentRole = null; + var runCount = 0; + var updates = new List(); + + await foreach (var update in loopAgent.RunStreamingAsync(input, session)) + { + // A new ResponseId signals the start of another inner run (loop iteration). + if (update.ResponseId is { } responseId && responseId != currentResponseId) + { + currentResponseId = responseId; + currentRole = null; + Console.WriteLine($"\n--- run {++runCount} ---"); + } + + // Print a role-based prefix whenever the speaker changes — for example the loop's on-behalf-of + // user feedback versus the agent's response. + if (update.Role is { } role && role != currentRole) + { + currentRole = role; + var prefix = role == ChatRole.User ? "User" : role == ChatRole.Assistant ? "Agent" : role.Value; + Console.Write($"\n{prefix}: "); + } + + Console.Write(update.Text); + updates.Add(update); + } + + Console.WriteLine(); + return updates.ToAgentResponse(); +} + +// Creates a HarnessAgent with the agent-mode provider always disabled (and the todo provider disabled +// by default), plus all other heavyweight providers turned off so each loop demo stays focused. +AIAgent CreateLeanHarnessAgent( + string name, + string instructions, + bool disableTodoProvider = true, + IList? tools = null, + ToolApprovalAgentOptions? toolApprovalAgentOptions = null) => + CreateChatClient().AsHarnessAgent(new HarnessAgentOptions + { + Name = name, + MaxContextWindowTokens = MaxContextWindowTokens, + MaxOutputTokens = MaxOutputTokens, + DisableAgentModeProvider = true, + DisableTodoProvider = disableTodoProvider, + DisableFileMemory = true, + DisableFileAccess = true, + DisableWebSearch = true, + ToolApprovalAgentOptions = toolApprovalAgentOptions, + ChatOptions = new ChatOptions + { + Instructions = instructions, + Tools = tools, + MaxOutputTokens = MaxOutputTokens, + }, + }); + +/// Tool used by the approval-handling demo. +internal static class DeploymentTools +{ + [Description("Deploy a service to production (requires approval).")] + public static string DeployService([Description("The name of the service to deploy.")] string service) => + $"Deployed {service} to production."; +} diff --git a/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/README.md b/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/README.md new file mode 100644 index 0000000000..4a19f66e57 --- /dev/null +++ b/dotnet/samples/02-agents/Harness/Harness_Step05_Loop/README.md @@ -0,0 +1,59 @@ +# What this sample demonstrates + +This sample demonstrates how to wrap a `HarnessAgent` with the **`LoopAgent`** decorator to re-invoke the agent until a configured **`LoopEvaluator`** decides to stop. A single decorator covers the common looping patterns — you just plug in a different evaluator (and optionally switch on fresh-context mode). + +The `HarnessAgent` pre-configures function invocation, per-service-call chat history persistence, and in-loop compaction, so each demo only supplies the chat client, token limits, and instructions, then wraps the result with a `LoopAgent`. + +## Looping patterns showcased + +The program runs four demos sequentially, each driven by a different evaluator: + +| # | Pattern | Evaluator | Notes | +| --- | --- | --- | --- | +| 1 | Completion-marker ("Ralph"-style) loop | `CompletionMarkerLoopEvaluator` | Re-invokes until the agent emits `COMPLETE`. Uses `FreshContextPerIteration = true` to restart each pass from the original task plus the aggregated feedback log on a new session, and includes the `{last_response}` placeholder in the feedback template so the agent sees its previous suggestion even though each pass starts fresh. | +| 2 | Delegate predicate (todos remaining) | `DelegateLoopEvaluator` | Loops while the built-in `TodoProvider` still has open items. The provider is fetched from the agent via `GetService()` and queried against the loop's current session. | +| 3 | AI judge | `AIJudgeLoopEvaluator` | A second `IChatClient` judges whether the original request was fully answered and continues while the answer is "no", injecting its gap analysis as the next input. | +| 4 | Approval heuristics + loop | `DelegateLoopEvaluator` + `ToolApprovalAgent` | Combines the `ToolApprovalAgent` auto-approval heuristics (`AutoApprovalRules`) with the loop, so a looped agent auto-approves tool calls instead of stalling on a pending approval. | + +`MaxIterations` caps every loop so it always terminates even if the evaluator never stops. + +### Evaluator mapping (Python → .NET) + +The Python sample in [microsoft/agent-framework#6174](https://github.com/microsoft/agent-framework/pull/6174) exposes several distinct loop classes. In .NET these collapse into one `LoopAgent` that consumes evaluators: + +| Python | .NET | +| --- | --- | +| Ralph loop (completion marker) | `LoopAgent` + `CompletionMarkerLoopEvaluator` | +| Ralph loop (fresh context each pass) | `LoopAgent` + `CompletionMarkerLoopEvaluator` + `FreshContextPerIteration = true` | +| Callable / predicate loop | `LoopAgent` + `DelegateLoopEvaluator` | +| AI judge loop | `LoopAgent` + `AIJudgeLoopEvaluator` | + +## Prerequisites + +Before running this sample, ensure you have: + +1. An Azure AI Foundry project with a deployed model (e.g., `gpt-5.4`) +2. Azure CLI installed and authenticated (`az login`) + +## Environment Variables + +Set the following environment variables: + +```bash +# Required: Your Azure AI Foundry project endpoint +export AZURE_AI_PROJECT_ENDPOINT="https://your-project.services.ai.azure.com/api/projects/your-project" + +# Optional: Model deployment name (defaults to gpt-5.4) +export AZURE_AI_MODEL_DEPLOYMENT_NAME="gpt-5.4" +``` + +## Running the Sample + +```bash +cd dotnet +dotnet run --project samples/02-agents/Harness/Harness_Step05_Loop +``` + +## What to Expect + +The program runs the four demos in order. Each loop is executed with `RunStreamingAsync`, so output is printed live and every re-invocation of the inner agent is marked with a `--- run N ---` header (detected via a change in the streamed `ResponseId`) — this lets you see exactly when the `LoopAgent` loops. Each streamed message is prefixed with `User:` or `Agent:` based on its role, so the loop's on-behalf-of feedback messages (surfaced as `User` turns) are visually distinct from the agent's responses (`Agent`). Each demo finishes by printing its aggregated final response. Demo 4 also prints an `Auto-approving: ...` line each time the `ToolApprovalAgent` heuristic approves the `DeployService` tool call, showing how approval-aware agents integrate with the loop. diff --git a/dotnet/samples/02-agents/Harness/README.md b/dotnet/samples/02-agents/Harness/README.md index 16fad9ac62..61981827c4 100644 --- a/dotnet/samples/02-agents/Harness/README.md +++ b/dotnet/samples/02-agents/Harness/README.md @@ -9,3 +9,4 @@ Samples demonstrating the [Harness AIContextProviders](../../../src/Microsoft.Ag | [Harness_Step01_Research](./Harness_Step01_Research/README.md) | Using a ChatClientAgent with TodoProvider and AgentModeProvider for research, showcasing planning mode and todo management | | [Harness_Step02_Research_WithBackgroundAgents](./Harness_Step02_Research_WithBackgroundAgents/README.md) | Using BackgroundAgentsProvider to delegate stock price lookups to a web-search background agent concurrently | | [Harness_Step03_DataProcessing](./Harness_Step03_DataProcessing/README.md) | Using FileAccessProvider to give an agent access to CSV data files for reading, analysis, and output generation | +| [Harness_Step05_Loop](./Harness_Step05_Loop/README.md) | Wrapping a HarnessAgent with the LoopAgent decorator to re-invoke it until a configured LoopEvaluator (completion marker, predicate, AI judge, or approval-aware loop) decides to stop | diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/AIJudgeLoopEvaluator.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/AIJudgeLoopEvaluator.cs new file mode 100644 index 0000000000..b482d6c93e --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/AIJudgeLoopEvaluator.cs @@ -0,0 +1,201 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Agents.AI; + +/// +/// A that uses a separate judge chat client to decide whether the user's original request +/// has been fully addressed, continuing the loop (with the judge's gap analysis as feedback) while the answer is "no". +/// +/// +/// +/// After each iteration the judge is queried directly (without any agent tools, session, or middleware) with the +/// original request and the agent's latest response, and asked for a structured . If the +/// judge client does not honor structured output, the verdict falls back to parsing the raw text for the +/// non-overlapping / markers (with +/// winning, so the loop keeps running, when the verdict is ambiguous or absent). +/// +/// +/// When the request is not yet answered, the evaluator returns feedback built from +/// with the judge's gap analysis substituted for +/// . How that feedback is delivered to the agent (and whether the session is +/// reset) is decided by the that consumes this evaluator. +/// +/// +/// The judge instructions act as a template: any occurrence of is replaced with the +/// rendered (or removed when no criteria are supplied), letting +/// callers add bespoke standards the response must satisfy. +/// +/// +/// LLM-judged loops are costly and probabilistic, so consider setting a stricter +/// on the owning . +/// +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class AIJudgeLoopEvaluator : LoopEvaluator +{ + /// The default system instructions used to prompt the judge. + /// + /// Acts as a template: the trailing is replaced with the rendered + /// (or removed when none are supplied). + /// + public const string DefaultInstructions = + "You are an evaluator. You are given a user's original request and an agent's latest response. " + + "Decide whether the agent has fully addressed the original request. " + + "Set 'answered' to true if the request has been fully addressed, or false if more work is still required. " + + "When 'answered' is false, use 'gapAnalysis' to explain what is still missing or what work remains. " + + "If you cannot return structured output, reply with " + DoneVerdictMarker + " when the request has been fully " + + "addressed, or " + MoreVerdictMarker + " when more work is still required." + + CriteriaPlaceholder; + + /// + /// The verdict marker the judge is asked to emit (for clients that do not honor structured output) when the + /// original request has been fully addressed. + /// + /// + /// and are deliberately non-overlapping (neither is + /// a substring of the other), so the text fallback cannot misclassify one verdict as the other. When the marker is + /// ambiguous or absent, wins so the loop keeps running rather than stopping on an + /// incomplete answer. + /// + public const string DoneVerdictMarker = "VERDICT: DONE"; + + /// + /// The verdict marker the judge is asked to emit (for clients that do not honor structured output) when more work + /// is still required. Takes precedence over when both (or neither) are present. + /// + public const string MoreVerdictMarker = "VERDICT: MORE"; + + /// + /// The placeholder token within (or a custom + /// ) that is replaced with the rendered + /// . When no criteria are supplied, the placeholder is removed. + /// + public const string CriteriaPlaceholder = "{criteria}"; + + /// + /// The placeholder token within (or a custom + /// ) that is replaced with the judge's gap analysis. + /// + public const string GapAnalysisPlaceholder = "{gap_analysis}"; + + /// The default template used to build the feedback produced when the request is not yet answered. + public const string DefaultFeedbackMessageTemplate = + "Your previous response did not fully address the original request. " + + "The following is still missing or incomplete: " + GapAnalysisPlaceholder + " " + + "Please continue and fully address the original request."; + + /// The value substituted for the gap analysis when the judge did not provide one. + private const string UnknownGapAnalysis = ""; + + private readonly IChatClient _judgeClient; + private readonly string _instructions; + private readonly string _feedbackMessageTemplate; + + /// + /// Initializes a new instance of the class. + /// + /// The chat client used to judge whether the original request was answered. + /// Optional configuration for the judge. When , defaults are used. + /// is . + public AIJudgeLoopEvaluator(IChatClient judgeClient, AIJudgeLoopEvaluatorOptions? options = null) + { + this._judgeClient = Throw.IfNull(judgeClient); + this._instructions = (options?.Instructions ?? DefaultInstructions) + .Replace(CriteriaPlaceholder, RenderCriteria(options?.Criteria)); + this._feedbackMessageTemplate = options?.FeedbackMessageTemplate ?? DefaultFeedbackMessageTemplate; + } + + /// + public override async ValueTask EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(context); + + // Build the judge's user message from AIContent so non-text request content (images, data, etc.) is + // preserved rather than flattened to text. The original request's contents are framed between header + // text segments, followed by the agent's latest response text. + var userContents = new List + { + new TextContent("# Has the original request been fully addressed?\n\n## Original request:\n"), + }; + foreach (ChatMessage message in context.InitialMessages) + { + userContents.AddRange(message.Contents); + } + + userContents.Add(new TextContent($"\n\n## Agent's latest response:\n{context.LastResponse.Text}")); + + List judgeMessages = + [ + new ChatMessage(ChatRole.System, this._instructions), + new ChatMessage(ChatRole.User, userContents), + ]; + + bool answered; + string gapAnalysis = UnknownGapAnalysis; + ChatResponse response = await this._judgeClient + .GetResponseAsync(judgeMessages, LoopJsonContext.Default.Options, cancellationToken: cancellationToken) + .ConfigureAwait(false); + + if (response.TryGetResult(out JudgeVerdict? verdict) && verdict is not null) + { + answered = verdict.Answered; + if (!string.IsNullOrWhiteSpace(verdict.GapAnalysis)) + { + gapAnalysis = verdict.GapAnalysis; + } + } + else + { + // Fallback for clients that do not honor structured output: look for the explicit, non-overlapping verdict + // markers. MoreVerdictMarker wins so an ambiguous or marker-less reply keeps looping rather than stopping + // on an incomplete answer. + string text = response.Text.ToUpperInvariant(); + answered = !text.Contains(MoreVerdictMarker) && text.Contains(DoneVerdictMarker); + } + + // The request is answered: stop looping. + if (answered) + { + return LoopEvaluation.Stop(); + } + + // Not yet answered: continue, providing feedback describing what is still missing. + string feedback = this._feedbackMessageTemplate.Replace(GapAnalysisPlaceholder, gapAnalysis); + return LoopEvaluation.Continue(feedback); + } + + /// + /// Renders the supplied into a bullet block appended at , + /// or an empty string when no non-blank criteria are supplied. + /// + private static string RenderCriteria(IEnumerable? criteria) + { + if (criteria is null) + { + return string.Empty; + } + + var builder = new StringBuilder(); + foreach (string criterion in criteria) + { + if (!string.IsNullOrWhiteSpace(criterion)) + { + builder.Append("\n- ").Append(criterion); + } + } + + return builder.Length == 0 + ? string.Empty + : "\n\nThe response must satisfy all of the following criteria:" + builder; + } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/AIJudgeLoopEvaluatorOptions.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/AIJudgeLoopEvaluatorOptions.cs new file mode 100644 index 0000000000..73285a924c --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/AIJudgeLoopEvaluatorOptions.cs @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Agents.AI; + +/// +/// Provides configuration options for . +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class AIJudgeLoopEvaluatorOptions +{ + /// + /// Gets or sets the system instructions used to prompt the judge, or to use + /// . + /// + /// + /// Any occurrence of in the instructions is replaced with + /// the rendered (or removed when no criteria are supplied). Instructions that omit the + /// placeholder do not receive the criteria. + /// + public string? Instructions { get; set; } + + /// + /// Gets or sets an optional list of additional criteria the agent's response must satisfy, evaluated by the judge + /// alongside the original request. + /// + /// + /// When supplied, the criteria are rendered into the judge instructions wherever + /// appears (including in + /// ). When or empty, the placeholder is + /// removed and no criteria are added. + /// + public IEnumerable? Criteria { get; set; } + + /// + /// Gets or sets the template used to build the feedback produced when the judge decides the original request was + /// not fully addressed, or to use + /// . + /// + /// + /// Any occurrence of in the template is replaced with the + /// judge's gap analysis (or a placeholder when none is available). + /// + public string? FeedbackMessageTemplate { get; set; } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/CompletionMarkerLoopEvaluator.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/CompletionMarkerLoopEvaluator.cs new file mode 100644 index 0000000000..cd2d7c8aa6 --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/CompletionMarkerLoopEvaluator.cs @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Agents.AI; + +/// +/// A that stops the loop once a configured marker string appears in the agent's latest +/// response, and otherwise continues with feedback asking the agent to keep working and to emit the marker when done. +/// +/// +/// The feedback produced while the marker is absent is built from a template (see +/// ) with the configured marker substituted +/// for , and the agent's latest response text substituted for +/// . How that feedback is delivered to the agent (and whether the session +/// is reset) is decided by the that consumes this evaluator. +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class CompletionMarkerLoopEvaluator : LoopEvaluator +{ + /// + /// The placeholder token within (or a custom + /// ) that is replaced with the + /// configured completion marker. + /// + public const string CompletionMarkerPlaceholder = "{completion_marker}"; + + /// + /// The placeholder token within a custom + /// that is replaced with the text of the agent's latest response. This is substituted on each evaluation, so it lets + /// the feedback echo back what the agent previously produced — useful when the consuming + /// uses , where the agent would + /// otherwise have no record of its prior output. + /// + public const string LastResponsePlaceholder = "{last_response}"; + + /// The default template used to build the feedback produced while the completion marker is absent. + public const string DefaultFeedbackMessageTemplate = + "Continue working on the request. When you have fully completed the task, end your response with the marker '" + + CompletionMarkerPlaceholder + "' to indicate completion."; + + private readonly string _completionMarker; + private readonly string _feedbackMessageTemplate; + + /// + /// Initializes a new instance of the class. + /// + /// The marker string that stops the loop once it appears in the agent's latest response text. + /// Optional configuration for the feedback message. When , defaults are used. + /// is , empty, or whitespace. + public CompletionMarkerLoopEvaluator(string completionMarker, CompletionMarkerLoopEvaluatorOptions? options = null) + { + this._completionMarker = Throw.IfNullOrWhitespace(completionMarker); + + // The completion marker is fixed, so substitute it once here. The optional {last_response} placeholder depends + // on the per-iteration response text, so it is substituted later in EvaluateAsync. + this._feedbackMessageTemplate = (options?.FeedbackMessageTemplate ?? DefaultFeedbackMessageTemplate) + .Replace(CompletionMarkerPlaceholder, this._completionMarker); + } + + /// + public override ValueTask EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(context); + + if (context.LastResponse.Text.Contains(this._completionMarker)) + { + return new ValueTask(LoopEvaluation.Stop()); + } + + string feedback = this._feedbackMessageTemplate.Replace(LastResponsePlaceholder, context.LastResponse.Text); + return new ValueTask(LoopEvaluation.Continue(feedback)); + } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/CompletionMarkerLoopEvaluatorOptions.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/CompletionMarkerLoopEvaluatorOptions.cs new file mode 100644 index 0000000000..de3c394c48 --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/CompletionMarkerLoopEvaluatorOptions.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Agents.AI; + +/// +/// Provides configuration options for . +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class CompletionMarkerLoopEvaluatorOptions +{ + /// + /// Gets or sets the template used to build the feedback produced when the completion marker has not yet appeared, + /// or to use . + /// + /// + /// Any occurrence of in the template is + /// replaced with the configured completion marker. Any occurrence of + /// is replaced, on each evaluation, with the + /// text of the agent's latest response — useful for echoing the agent's prior output back to it when the consuming + /// is used with a fresh context per iteration. + /// + public string? FeedbackMessageTemplate { get; set; } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/DelegateLoopEvaluator.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/DelegateLoopEvaluator.cs new file mode 100644 index 0000000000..9c41b1a11c --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/DelegateLoopEvaluator.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Agents.AI; + +/// +/// A that delegates the re-invocation decision and feedback to a user-supplied callback. +/// +/// +/// This is the most flexible evaluator: the supplied delegate receives the full and returns +/// a , so it can decide both whether to continue and what feedback (if any) to provide. +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class DelegateLoopEvaluator : LoopEvaluator +{ + private readonly Func> _evaluate; + + /// + /// Initializes a new instance of the class. + /// + /// A callback that decides whether to re-invoke the agent and what feedback to provide. + /// is . + public DelegateLoopEvaluator(Func> evaluate) + { + this._evaluate = Throw.IfNull(evaluate); + } + + /// + public override ValueTask EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(context); + return this._evaluate(context, cancellationToken); + } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/JudgeVerdict.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/JudgeVerdict.cs new file mode 100644 index 0000000000..19d802e2fc --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/JudgeVerdict.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.ComponentModel; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Agents.AI; + +/// +/// Represents the structured verdict returned by the judge chat client used by . +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +internal sealed class JudgeVerdict +{ + /// + /// Gets or sets a value indicating whether the agent has fully addressed the user's original request. + /// + [Description("True if the agent has fully addressed the original request, otherwise false.")] + public bool Answered { get; set; } + + /// + /// Gets or sets an explanation of what is still missing when the request has not been fully addressed. + /// + [Description("When 'answered' is false, explain what is still missing or what work remains to fully address the original request.")] + public string GapAnalysis { get; set; } = string.Empty; +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopAgent.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopAgent.cs new file mode 100644 index 0000000000..c92de6a331 --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopAgent.cs @@ -0,0 +1,548 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Agents.AI; + +/// +/// A that re-invokes the wrapped agent in a loop until the configured +/// set decides to stop. +/// +/// +/// +/// After each run of the wrapped agent, the configured evaluators are asked whether to re-invoke the agent and what +/// feedback to carry forward. This enables patterns such as iterative refinement, working through a task list, or +/// judging whether the original request was answered. Out-of-the-box evaluators include +/// , , and +/// . +/// +/// +/// When multiple evaluators are supplied they are evaluated in order after each iteration. The first evaluator that +/// asks to re-invoke wins: its feedback drives the next iteration and the remaining evaluators are not evaluated. The +/// loop stops only when every evaluator asks to stop. Consequently, evaluator order is priority order and +/// means "this evaluator does not request continuation" rather than a veto that +/// terminates the loop; place stop-only guards accordingly. +/// +/// +/// The caller's initial messages are sent to the wrapped agent exactly once. By default (when +/// is ) the loop reuses a single session +/// and sends only the winning evaluator's feedback as the next input, letting the agent continue from session history. +/// When is , each re-invocation restarts +/// from the original input messages plus an aggregated feedback log, and the session is reset for each iteration: a +/// loop-owned session is created anew, while a caller-supplied session is restored from a snapshot taken at the start +/// of the run (so the wrapped agent must support session serialization). An evaluator may instead supply the exact next +/// messages via , bypassing this construction. +/// +/// +/// The loop is bounded by a global safety cap () regardless of the +/// evaluators. If an iteration produces a pending tool-approval request, the loop stops and returns that response to +/// the caller rather than attempting to resolve the approval automatically. +/// +/// +/// A non-streaming run returns, by default, a single that aggregates the full transcript +/// in order: the on-behalf-of messages the loop injected for each re-invocation followed by that iteration's response +/// messages. The caller's original input messages are not echoed. Set +/// to instead return only the final iteration's +/// response. A streaming run always yields every iteration's updates, emitting the injected on-behalf-of messages as +/// updates before each re-invocation. The injected messages can be attributed with +/// , or omitted from the surfaced output entirely with +/// . +/// +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class LoopAgent : DelegatingAIAgent +{ + /// The default value used for when none is specified. + public const int DefaultMaxIterations = 10; + + private readonly IReadOnlyList _evaluators; + private readonly int _maxIterations; + private readonly bool _freshContextPerIteration; + private readonly string? _onBehalfOfAuthorName; + private readonly bool _excludeOnBehalfOfMessages; + private readonly bool _nonStreamingReturnsLastResponseOnly; + private readonly System.Func? _sessionCreatedCallback; + private readonly ILogger _logger; + + /// + /// Initializes a new instance of the class with a single evaluator. + /// + /// The underlying agent to invoke in a loop. + /// The that decides whether to re-invoke the agent. + /// Optional configuration for the loop. When , defaults are used. + /// Optional factory used to create the loop's logger. + /// or is . + /// is less than 1. + public LoopAgent(AIAgent innerAgent, LoopEvaluator evaluator, LoopAgentOptions? options = null, ILoggerFactory? loggerFactory = null) + : this(innerAgent, [Throw.IfNull(evaluator)], options, loggerFactory) + { + } + + /// + /// Initializes a new instance of the class with one or more evaluators. + /// + /// The underlying agent to invoke in a loop. + /// + /// The ordered set of that decide whether to re-invoke the agent. They are evaluated in + /// order after each iteration and the first that asks to re-invoke wins. + /// + /// Optional configuration for the loop. When , defaults are used. + /// Optional factory used to create the loop's logger. + /// or is , or contains a element. + /// is empty. + /// is less than 1. + public LoopAgent(AIAgent innerAgent, IEnumerable evaluators, LoopAgentOptions? options = null, ILoggerFactory? loggerFactory = null) + : base(innerAgent) + { + _ = Throw.IfNull(evaluators); + LoopEvaluator[] evaluatorArray = evaluators.ToArray(); + if (evaluatorArray.Length == 0) + { + throw new System.ArgumentException("At least one evaluator must be supplied.", nameof(evaluators)); + } + + foreach (LoopEvaluator item in evaluatorArray) + { + _ = Throw.IfNull(item, nameof(evaluators)); + } + + this._evaluators = evaluatorArray; + + this._maxIterations = Throw.IfLessThan(options?.MaxIterations ?? DefaultMaxIterations, 1); + this._freshContextPerIteration = options?.FreshContextPerIteration ?? false; + this._onBehalfOfAuthorName = options?.OnBehalfOfAuthorName; + this._excludeOnBehalfOfMessages = options?.ExcludeOnBehalfOfMessages ?? false; + this._nonStreamingReturnsLastResponseOnly = options?.NonStreamingReturnsLastResponseOnly ?? false; + this._sessionCreatedCallback = options?.SessionCreatedCallback; + this._logger = (loggerFactory ?? NullLoggerFactory.Instance).CreateLogger(); + } + + /// + protected override async Task RunCoreAsync( + IEnumerable messages, + AgentSession? session = null, + AgentRunOptions? options = null, + CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(messages); + + // Capture the caller's initial messages (sent once) and ensure the loop always runs against a session. + IReadOnlyList initialMessages = messages as IReadOnlyList ?? messages.ToList(); + bool sessionProvidedByCaller = session is not null; + if (session is null) + { + session = await this.InnerAgent.CreateSessionAsync(cancellationToken).ConfigureAwait(false); + await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false); + } + + // When a fresh context is requested over a caller-supplied session, snapshot the pristine session up front so + // each re-invocation can restart from a fresh clone (see CreateFreshIterationSessionAsync). Taken before the + // first iteration mutates the session. + JsonElement? initialSessionSnapshot = this._freshContextPerIteration && sessionProvidedByCaller + ? await this.InnerAgent.SerializeSessionAsync(session, cancellationToken: cancellationToken).ConfigureAwait(false) + : null; + + LoopContext? context = null; + List feedbackLog = []; + IEnumerable currentMessages = initialMessages; + int iteration = 0; + + // Aggregates the full transcript across iterations: each iteration's surfaced on-behalf-of input messages + // followed by that iteration's response messages. Unused when only the final response is returned. + List transcript = []; + + // The loop-synthesized on-behalf-of messages that drive the current iteration (none for the first iteration). + IReadOnlyList currentSurfaced = []; + + while (true) + { + // Run the wrapped agent using the context's session once it exists (it may have been replaced for a fresh + // context), otherwise the resolved session for the first run. + AgentSession activeSession = context?.Session ?? session; + AgentResponse response = await this.InnerAgent.RunAsync(currentMessages, activeSession, options, cancellationToken).ConfigureAwait(false); + iteration++; + + // Record this iteration's on-behalf-of input (before the response it elicited) and the response itself. + transcript.AddRange(currentSurfaced); + transcript.AddRange(response.Messages); + + // Create the context after the first run (so LastResponse is never null) and reuse it thereafter. + // Expose the feedback log as a read-only wrapper so evaluators cannot downcast and mutate it; the + // wrapper still reflects entries appended by the loop. + context ??= new LoopContext(this.InnerAgent, session, initialMessages, response, options) { Feedback = feedbackLog.AsReadOnly() }; + + context.Iteration = iteration; + context.LastResponse = response; + + // Stop and surface the response when the agent is waiting for a tool approval. + if (HasPendingApprovalRequests(response)) + { + return this.BuildResult(response, transcript); + } + + // Enforce the global safety cap regardless of what the evaluators want. + if (iteration >= this._maxIterations) + { + this.LogMaxIterationsReached(iteration); + return this.BuildResult(response, transcript); + } + + // Ask the evaluators whether to continue; stop when none of them request a re-invocation. + LoopNextStep step = await this.EvaluateAndBuildNextAsync(context, feedbackLog, initialSessionSnapshot, cancellationToken).ConfigureAwait(false); + if (!step.ShouldContinue) + { + return this.BuildResult(response, transcript); + } + + currentMessages = step.Messages; + currentSurfaced = step.SurfacedMessages; + } + } + + /// + protected override async IAsyncEnumerable RunCoreStreamingAsync( + IEnumerable messages, + AgentSession? session = null, + AgentRunOptions? options = null, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(messages); + + // Capture the caller's initial messages (sent once) and ensure the loop always runs against a session. + IReadOnlyList initialMessages = messages as IReadOnlyList ?? messages.ToList(); + bool sessionProvidedByCaller = session is not null; + if (session is null) + { + session = await this.InnerAgent.CreateSessionAsync(cancellationToken).ConfigureAwait(false); + await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false); + } + + // When a fresh context is requested over a caller-supplied session, snapshot the pristine session up front so + // each re-invocation can restart from a fresh clone (see CreateFreshIterationSessionAsync). Taken before the + // first iteration mutates the session. + JsonElement? initialSessionSnapshot = this._freshContextPerIteration && sessionProvidedByCaller + ? await this.InnerAgent.SerializeSessionAsync(session, cancellationToken: cancellationToken).ConfigureAwait(false) + : null; + + LoopContext? context = null; + List feedbackLog = []; + IEnumerable currentMessages = initialMessages; + int iteration = 0; + + // The loop-synthesized on-behalf-of messages that drive the current iteration (none for the first iteration). + IReadOnlyList currentSurfaced = []; + + while (true) + { + // Stream this iteration's updates to the caller while collecting them so the iteration's full + // response can be aggregated for evaluation (true per-iteration streaming). Uses the context's + // session once it exists (it may have been replaced for a fresh context), otherwise the resolved session. + AgentSession activeSession = context?.Session ?? session; + List updates = []; + + // The on-behalf-of messages that drive this iteration are surfaced before the response they elicit (none + // for the first iteration). They are flushed lazily on the first inner update so they can be stamped with + // that update's ResponseId/AgentId, keeping them grouped with the iteration for downstream mergers. + bool surfacedPending = currentSurfaced.Count > 0; + await foreach (var update in this.InnerAgent.RunStreamingAsync(currentMessages, activeSession, options, cancellationToken).ConfigureAwait(false)) + { + if (surfacedPending) + { + foreach (ChatMessage surfaced in currentSurfaced) + { + yield return CreateOnBehalfOfUpdate(surfaced, update.ResponseId); + } + + surfacedPending = false; + } + + updates.Add(update); + yield return update; + } + + // The inner agent produced no updates this iteration; surface the on-behalf-of messages anyway. Since there + // is no iteration response to inherit from, generate a ResponseId so they still group together downstream. + if (surfacedPending) + { + string fallbackResponseId = System.Guid.NewGuid().ToString("N"); + foreach (ChatMessage surfaced in currentSurfaced) + { + yield return CreateOnBehalfOfUpdate(surfaced, fallbackResponseId); + } + } + + // Aggregate this iteration's updates and record the result on the context. + iteration++; + AgentResponse response = updates.ToAgentResponse(); + + // Create the context after the first run (so LastResponse is never null) and reuse it thereafter. + // Expose the feedback log as a read-only wrapper so evaluators cannot downcast and mutate it; the + // wrapper still reflects entries appended by the loop. + context ??= new LoopContext(this.InnerAgent, session, initialMessages, response, options) { Feedback = feedbackLog.AsReadOnly() }; + + context.Iteration = iteration; + context.LastResponse = response; + + // Stop when the agent is waiting for a tool approval. + if (HasPendingApprovalRequests(response)) + { + yield break; + } + + // Enforce the global safety cap regardless of what the evaluators want. + if (iteration >= this._maxIterations) + { + this.LogMaxIterationsReached(iteration); + yield break; + } + + // Ask the evaluators whether to continue; stop when none of them request a re-invocation. + LoopNextStep step = await this.EvaluateAndBuildNextAsync(context, feedbackLog, initialSessionSnapshot, cancellationToken).ConfigureAwait(false); + if (!step.ShouldContinue) + { + yield break; + } + + currentMessages = step.Messages; + currentSurfaced = step.SurfacedMessages; + } + } + + /// + /// Evaluates the evaluators in order and, for the first one that requests a re-invocation, builds the next input + /// according to the loop's feedback and fresh-context policy. + /// + private async ValueTask EvaluateAndBuildNextAsync(LoopContext context, List feedbackLog, JsonElement? initialSessionSnapshot, CancellationToken cancellationToken) + { + // Evaluate in order; the first evaluator that requests a re-invocation wins. + LoopEvaluation? winner = null; + foreach (LoopEvaluator evaluator in this._evaluators) + { + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context, cancellationToken).ConfigureAwait(false); + if (evaluation.ShouldReinvoke) + { + winner = evaluation; + break; + } + } + + // Every evaluator asked to stop. + if (winner is null) + { + return LoopNextStep.Stop(); + } + + // Start the next iteration from a fresh session when a fresh context is requested, so no prior conversation + // history leaks across iterations. This applies regardless of how the next input is built (feedback or explicit + // ContinueWithMessages): a caller-supplied session is cloned from the pristine start-of-run snapshot; a + // loop-owned session is created anew. + if (this._freshContextPerIteration) + { + context.Session = await this.CreateFreshIterationSessionAsync(context, initialSessionSnapshot, cancellationToken).ConfigureAwait(false); + } + + // Record one feedback entry for this re-invoked iteration (null when none, including ContinueWithMessages + // iterations which carry no feedback string) so the log stays aligned: one entry per re-invoked iteration, with + // the last element always corresponding to the latest re-invoked iteration. Continue() normalizes whitespace to null. + feedbackLog.Add(winner.Feedback); + + // An evaluator supplied explicit messages: send them verbatim, bypassing feedback/message construction (the + // session is still reset above when a fresh context is requested). These are surfaced to the caller as-is (the + // evaluator owns them, including any author name). + if (winner.Messages is not null) + { + return LoopNextStep.Continue(winner.Messages, this.Surfaced(winner.Messages)); + } + + (List messages, List surfaced) = this.BuildNextMessages(context, feedbackLog); + return LoopNextStep.Continue(messages, this.Surfaced(surfaced)); + } + + /// + /// Returns the messages to surface to the caller, honoring . + /// + private IReadOnlyList Surfaced(IReadOnlyList surfaced) + => this._excludeOnBehalfOfMessages ? [] : surfaced; + + /// + /// Creates a streaming update for a surfaced on-behalf-of message, inheriting the driven iteration's + /// so downstream mergers group it with that iteration, and ensuring a unique + /// non-null . The is left + /// unset because the message is synthesized by the loop, not produced by the wrapped agent. + /// + private static AgentResponseUpdate CreateOnBehalfOfUpdate(ChatMessage message, string? responseId) + => new(message.Role, message.Contents) + { + AuthorName = message.AuthorName, + MessageId = message.MessageId is { Length: > 0 } messageId ? messageId : System.Guid.NewGuid().ToString("N"), + ResponseId = responseId, + }; + + /// + /// Builds the messages sent to the wrapped agent for the next iteration along with the subset that should be + /// surfaced to the caller (the loop-synthesized on-behalf-of feedback). Replayed caller input is excluded from the + /// surfaced subset. + /// + private (List Messages, List Surfaced) BuildNextMessages(LoopContext context, List feedback) + { + var messages = new List(); + var surfaced = new List(); + + if (this._freshContextPerIteration) + { + // Fresh context: re-send the original task plus an aggregated log of all feedback recorded so far. Only the + // synthesized feedback message is surfaced; the replayed caller input messages are not. + messages.AddRange(context.InitialMessages); + + ChatMessage? feedbackMessage = this.BuildAggregatedFeedbackMessage(feedback); + if (feedbackMessage is not null) + { + messages.Add(feedbackMessage); + surfaced.Add(feedbackMessage); + } + } + else + { + // Reused session: send only the latest feedback verbatim (the session already retains earlier turns). When + // the latest iteration produced no feedback, send no messages and let the agent continue from history. + string? latest = feedback.Count > 0 ? feedback[feedback.Count - 1] : null; + if (!string.IsNullOrWhiteSpace(latest)) + { + var feedbackMessage = new ChatMessage(ChatRole.User, latest) { AuthorName = this._onBehalfOfAuthorName, MessageId = System.Guid.NewGuid().ToString("N") }; + messages.Add(feedbackMessage); + surfaced.Add(feedbackMessage); + } + } + + return (messages, surfaced); + } + + private ChatMessage? BuildAggregatedFeedbackMessage(IReadOnlyList feedback) + { + var body = new StringBuilder("## Feedback\n"); + bool any = false; + foreach (string? entry in feedback) + { + if (!string.IsNullOrWhiteSpace(entry)) + { + body.Append("\n- ").Append(entry); + any = true; + } + } + + return any ? new ChatMessage(ChatRole.User, body.ToString()) { AuthorName = this._onBehalfOfAuthorName, MessageId = System.Guid.NewGuid().ToString("N") } : null; + } + + /// + /// Produces the non-streaming run result: either the final iteration's response (when configured) or an + /// aggregated response carrying the full transcript with the final response's metadata. + /// + private AgentResponse BuildResult(AgentResponse lastResponse, List transcript) + { + if (this._nonStreamingReturnsLastResponseOnly) + { + return lastResponse; + } + + return new AgentResponse(transcript) + { + AgentId = lastResponse.AgentId, + ResponseId = lastResponse.ResponseId, + CreatedAt = lastResponse.CreatedAt, + FinishReason = lastResponse.FinishReason, + Usage = lastResponse.Usage, + AdditionalProperties = lastResponse.AdditionalProperties, + ContinuationToken = lastResponse.ContinuationToken, + }; + } + + private static bool HasPendingApprovalRequests(AgentResponse response) + { + foreach (ChatMessage message in response.Messages) + { + foreach (AIContent content in message.Contents) + { + if (content is ToolApprovalRequestContent) + { + return true; + } + } + } + + return false; + } + + private void LogMaxIterationsReached(int iteration) + { + if (this._logger.IsEnabled(LogLevel.Information)) + { + this._logger.LogInformation("LoopAgent reached the maximum of {MaxIterations} iterations and stopped.", iteration); + } + } + + /// + /// Creates the session used for the next iteration when a fresh context is requested. A caller-supplied session is + /// restored from the pristine start-of-run snapshot by deserializing a fresh clone; a loop-owned session (no + /// snapshot) is created anew. The configured session-created callback is notified of the new session. + /// + private async ValueTask CreateFreshIterationSessionAsync(LoopContext context, JsonElement? initialSessionSnapshot, CancellationToken cancellationToken) + { + AgentSession session = initialSessionSnapshot is { } snapshot + ? await this.InnerAgent.DeserializeSessionAsync(snapshot, cancellationToken: cancellationToken).ConfigureAwait(false) + : await context.Agent.CreateSessionAsync(cancellationToken).ConfigureAwait(false); + + await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false); + return session; + } + + /// + /// Invokes the configured (if any) with a session the loop + /// has just created, so the caller can observe the latest session. + /// + private async ValueTask NotifyNewSessionAsync(AgentSession session, CancellationToken cancellationToken) + { + if (this._sessionCreatedCallback is not null) + { + await this._sessionCreatedCallback(session, cancellationToken).ConfigureAwait(false); + } + } + + /// Represents the loop's decision for the next iteration: stop, or continue with a set of messages. + private readonly struct LoopNextStep + { + private LoopNextStep(bool shouldContinue, IReadOnlyList messages, IReadOnlyList surfacedMessages) + { + this.ShouldContinue = shouldContinue; + this.Messages = messages; + this.SurfacedMessages = surfacedMessages; + } + + public bool ShouldContinue { get; } + + /// Gets the full set of messages sent to the wrapped agent for the next iteration. + public IReadOnlyList Messages { get; } + + /// + /// Gets the subset of the loop synthesized on the caller's behalf (feedback or + /// evaluator-supplied messages) that should be surfaced to the caller. Replayed caller input is excluded. + /// + public IReadOnlyList SurfacedMessages { get; } + + public static LoopNextStep Stop() => new(shouldContinue: false, [], []); + + public static LoopNextStep Continue(IReadOnlyList messages, IReadOnlyList surfacedMessages) + => new(shouldContinue: true, messages, surfacedMessages); + } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopAgentOptions.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopAgentOptions.cs new file mode 100644 index 0000000000..ec009b4594 --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopAgentOptions.cs @@ -0,0 +1,117 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Agents.AI; + +/// +/// Provides configuration options for . +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class LoopAgentOptions +{ + /// + /// Gets or sets the global safety cap on the number of times the wrapped agent is invoked in a single loop run, + /// or to use . + /// + /// + /// This is an absolute upper bound that applies regardless of the configured set. An + /// evaluator may stop the loop earlier, but no evaluator can cause the loop to exceed this cap, so raise this value + /// if you intend to allow longer loops. + /// + public int? MaxIterations { get; set; } + + /// + /// Gets or sets a value indicating whether each re-invocation restarts from a clean context: the original input + /// messages plus an aggregated feedback log, rather than the latest feedback appended to the prior conversation. + /// Defaults to . + /// + /// + /// + /// This rebuilds the input messages each iteration and resets the session before each re-invocation so no + /// prior conversation history leaks across iterations. When the loop owns the session it creates a new one each + /// iteration. When the caller supplies a session, serializes it once at the start of the run + /// and restores a fresh clone (by deserializing that snapshot) before each re-invocation; this requires the wrapped + /// agent to support session serialization. The first iteration still runs against the caller's supplied session. + /// + /// + /// Note that cloning will only result in a fresh context, if the chat history storage mechanism supports cloning. + /// For example the default in-memory storage supports cloning, since the messages are serialized as part of the snapshot. + /// + /// + /// However, if the Conversations service is used, which stores messages in a single threaded list of messages, + /// then the cloned session will still contain the full message history, since the snapshot only captures an id reference + /// to the conversation and not the individual messages. + /// + /// + /// On the other hand, if responses are used with response ids, cloning will work well, since response ids are + /// forkable. Each new response has its own id, and is based on the id of the previous response. + /// + /// + /// On iterations where an evaluator returns explicit messages via + /// , the session is still reset (a fresh or cloned session is + /// used); only the rebuild of the input messages from the feedback log is skipped, because the evaluator's explicit + /// messages are sent verbatim. + /// + /// + public bool FreshContextPerIteration { get; set; } + + /// + /// Gets or sets the author name stamped on the loop-synthesized "on-behalf-of" messages that the loop injects + /// into the wrapped agent for re-invocations, or to leave them unattributed. Defaults to + /// . + /// + /// + /// When the loop re-invokes the wrapped agent it sends feedback messages on the caller's behalf. Setting this name + /// marks those autonomous messages (for example with a value such as "loop") so that callers and the wrapped + /// agent can distinguish them from the caller's own turns. It is applied only to messages the loop synthesizes + /// itself; messages supplied explicitly by an evaluator via are + /// left untouched, and the caller's original input messages are never modified. + /// + public string? OnBehalfOfAuthorName { get; set; } + + /// + /// Gets or sets a value indicating whether the on-behalf-of messages the loop injects for re-invocations are + /// omitted from the output surfaced back to the caller. Defaults to . + /// + /// + /// When (the default) a streaming run emits the injected feedback / evaluator-supplied + /// messages as updates before each re-invocation, and a non-streaming run includes them in the aggregated + /// transcript, so callers can see the loop acting autonomously on their behalf. Set this to + /// to omit those messages from the returned output and surface only the wrapped agent's responses; the messages are + /// still sent to the wrapped agent. This setting has no effect when + /// causes a non-streaming run to return only the final response. + /// + public bool ExcludeOnBehalfOfMessages { get; set; } + + /// + /// Gets or sets a value indicating whether a non-streaming run returns only the final iteration's response instead + /// of the aggregated transcript of every iteration. Defaults to . + /// + /// + /// By default a non-streaming run returns a single that + /// aggregates, in order, the on-behalf-of messages the loop injected and the responses produced by every + /// iteration — mirroring the full sequence of updates yielded by a streaming run. Set this to + /// to instead return only the last iteration's . This setting affects non-streaming runs + /// only; streaming runs always yield every iteration's updates. + /// + public bool NonStreamingReturnsLastResponseOnly { get; set; } + + /// + /// Gets or sets an optional callback invoked whenever creates a new session, so the caller + /// can capture the latest session (for example to continue the conversation after the loop completes). Defaults to + /// . + /// + /// + /// The callback is invoked with each session the loop itself creates: the initial loop-owned session (when the + /// caller does not supply one) and, when is enabled, every session created + /// for a re-invocation — whether a brand-new loop-owned session or a fresh clone deserialized from the caller's + /// original session. It is not invoked for a caller-supplied session, since the caller already holds that one. When + /// it fires multiple times, the most recent invocation carries the session the loop is currently using. + /// + public Func? SessionCreatedCallback { get; set; } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopContext.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopContext.cs new file mode 100644 index 0000000000..d0bdf03e7b --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopContext.cs @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.AI; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Agents.AI; + +/// +/// Provides the per-run state that a uses to decide whether a +/// should re-invoke the wrapped agent and what feedback to provide. +/// +/// +/// A single instance is created for each run and is +/// reused across iterations, with and updated before +/// each call to . Because evaluator instances are expected to be +/// stateless and may be shared across concurrent runs, any per-run mutable state must be stored on this +/// context — for example via — rather than in fields on the evaluator itself. +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class LoopContext +{ + /// + /// Initializes a new instance of the class. + /// + /// The wrapped that is being looped. + /// The used for the loop. + /// The messages passed in for the first iteration of the loop. + /// The produced by the iteration that just completed. + /// The that were passed to the loop run, if any. + /// + /// , , , or + /// is . + /// + public LoopContext( + AIAgent agent, + AgentSession session, + IReadOnlyList initialMessages, + AgentResponse lastResponse, + AgentRunOptions? runOptions = null) + { + this.Agent = Throw.IfNull(agent); + this.Session = Throw.IfNull(session); + this.InitialMessages = Throw.IfNull(initialMessages); + this.LastResponse = Throw.IfNull(lastResponse); + this.RunOptions = runOptions; + } + + /// Gets the wrapped that is being looped. + public AIAgent Agent { get; } + + /// Gets the used for the loop. + /// + /// When the caller does not provide a session, creates one up front. By default the same + /// session is reused across every iteration so that conversation continuity is preserved and the original request + /// is not replayed. When is enabled, + /// resets the session before each re-invocation: a loop-owned session is created anew, while a caller-supplied + /// session is restored from a snapshot taken at the start of the run by deserializing a fresh clone. + /// + public AgentSession Session { get; internal set; } + + /// Gets the messages that were passed in for the first iteration of the loop. + public IReadOnlyList InitialMessages { get; } + + /// Gets the that were passed to the loop run, if any. + public AgentRunOptions? RunOptions { get; } + + /// Gets the number of completed agent runs so far (1-based after the first run). + public int Iteration { get; internal set; } + + /// Gets the produced by the iteration that just completed. + public AgentResponse LastResponse { get; internal set; } + + /// + /// Gets the feedback accumulated across iterations so far, one entry per re-invoked iteration in order. + /// + /// + /// Each entry is the feedback supplied by the evaluator that requested the corresponding re-invocation, or + /// when that iteration produced no feedback string (for example a plain + /// with no text, or a + /// that supplied explicit messages instead). The log records one entry per re-invoked iteration regardless of mode, + /// so the last entry always corresponds to the most recent re-invoked iteration. This log is owned and populated by + /// ; evaluators may read it to reason over prior feedback. + /// + public IReadOnlyList Feedback { get; internal set; } = []; + + /// + /// Gets a mutable bag of per-run state shared across iterations and available to every evaluator. + /// + /// + /// This dictionary is owned by the loop run (not by any evaluator instance) so that evaluators can remain + /// stateless. Evaluators can stash arbitrary per-run state here keyed by a collision-resistant key. + /// + public AdditionalPropertiesDictionary AdditionalProperties { get; } = new(); +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopEvaluation.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopEvaluation.cs new file mode 100644 index 0000000000..2d8de152e8 --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopEvaluation.cs @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; +using Microsoft.Extensions.AI; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Agents.AI; + +/// +/// Represents the result produced by a after an agent iteration: whether the +/// should re-invoke the wrapped agent and, optionally, the feedback or explicit messages that +/// should inform the next iteration. +/// +/// +/// An evaluator is concerned only with the judgment (continue or stop) and what to carry forward. In the common case +/// it returns a feedback string and lets the decide how that feedback is turned into the next +/// input (and whether the session is reset). For full control, supplies the exact +/// messages to send next, bypassing the loop's feedback and message construction. +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public sealed class LoopEvaluation +{ + private static readonly LoopEvaluation s_stop = new(shouldReinvoke: false, feedback: null, messages: null); + + private LoopEvaluation(bool shouldReinvoke, string? feedback, IReadOnlyList? messages) + { + this.ShouldReinvoke = shouldReinvoke; + this.Feedback = feedback; + this.Messages = messages; + } + + /// Gets a value indicating whether the loop should run the wrapped agent again. + public bool ShouldReinvoke { get; } + + /// + /// Gets the feedback describing what is missing or what the agent should do next, or when + /// no feedback was produced. + /// + /// This value is only meaningful when is . + public string? Feedback { get; } + + /// + /// Gets the explicit messages to send on the next iteration, or when the loop should build + /// the next input from feedback instead. + /// + /// + /// When non-, the sends these messages verbatim and does not apply + /// its feedback or message construction. The session is still reset when + /// is enabled. Only meaningful when + /// is . + /// + internal IReadOnlyList? Messages { get; } + + /// Creates an evaluation that stops the loop and returns the latest response to the caller. + /// An evaluation with set to . + public static LoopEvaluation Stop() => s_stop; + + /// Creates an evaluation that re-invokes the wrapped agent, optionally carrying feedback forward. + /// + /// Optional feedback to inform the next iteration. , empty, or whitespace is treated as no + /// feedback. + /// + /// An evaluation with set to . + public static LoopEvaluation Continue(string? feedback = null) => new(shouldReinvoke: true, string.IsNullOrWhiteSpace(feedback) ? null : feedback, messages: null); + + /// + /// Creates an evaluation that re-invokes the wrapped agent with the specified messages, bypassing the loop's + /// feedback and message construction. + /// + /// The messages to send to the wrapped agent on the next iteration. + /// An evaluation with set to . + /// is . + /// + /// Use this for full control over the next input (for example to send non-user roles, multiple messages, or + /// non-text content). The supplied messages are sent verbatim and the loop does not accumulate or inject feedback + /// for this iteration. + /// + public static LoopEvaluation ContinueWithMessages(IEnumerable messages) + { + _ = Throw.IfNull(messages); + return new LoopEvaluation(shouldReinvoke: true, feedback: null, messages: messages as IReadOnlyList ?? messages.ToList()); + } +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopEvaluator.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopEvaluator.cs new file mode 100644 index 0000000000..328c99e80c --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopEvaluator.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Agents.AI; + +/// +/// Provides the abstract base class for the component that decides, after each agent iteration, whether a +/// should re-invoke the wrapped agent and what feedback to provide. +/// +/// +/// +/// A is pure judgment: it inspects the and returns a +/// describing whether to continue and any feedback for the next iteration. It does not +/// manage the session or construct the next input messages — that is the responsibility of the +/// that consumes it. +/// +/// +/// Out-of-the-box implementations include , , +/// and . Implementations should be stateless and safe to share across +/// concurrent loop runs; any per-run state must be stored on the supplied . +/// +/// +[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] +public abstract class LoopEvaluator +{ + /// + /// Evaluates the loop state after an iteration and decides whether to re-invoke the wrapped agent and what + /// feedback to provide. + /// + /// The per-run describing the current loop state. + /// The to monitor for cancellation requests. + /// + /// A value task whose result is a indicating whether to continue and, if so, the + /// feedback to carry forward to the next iteration. + /// + public abstract ValueTask EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default); +} diff --git a/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopJsonContext.cs b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopJsonContext.cs new file mode 100644 index 0000000000..8d69383e3f --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI/Harness/Loop/LoopJsonContext.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Microsoft.Agents.AI; + +/// +/// Source-generated for loop types that require JSON serialization, such as the +/// structured used by . +/// +[JsonSourceGenerationOptions(JsonSerializerDefaults.Web)] +[JsonSerializable(typeof(JudgeVerdict))] +[ExcludeFromCodeCoverage] +internal sealed partial class LoopJsonContext : JsonSerializerContext; diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/AIJudgeLoopEvaluatorTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/AIJudgeLoopEvaluatorTests.cs new file mode 100644 index 0000000000..d91494ba03 --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/AIJudgeLoopEvaluatorTests.cs @@ -0,0 +1,314 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Moq; + +using static Microsoft.Agents.AI.UnitTests.LoopTestHelpers; + +namespace Microsoft.Agents.AI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class AIJudgeLoopEvaluatorTests +{ + /// + /// Verify that the evaluator stops when the judge reports the request was answered. + /// + [Fact] + public async Task EvaluateAsync_Answered_StopsAsync() + { + // Arrange + var judgeClient = CreateJudgeClient("{\"answered\":true}"); + var evaluator = new AIJudgeLoopEvaluator(judgeClient); + LoopContext context = CreateContext(); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.False(evaluation.ShouldReinvoke); + } + + /// + /// Verify that when not answered the evaluator continues with feedback carrying the judge's gap analysis. + /// + [Fact] + public async Task EvaluateAsync_NotAnswered_ContinuesWithGapAnalysisAsync() + { + // Arrange + var judgeClient = CreateJudgeClient("{\"answered\":false,\"gapAnalysis\":\"the cost estimate is missing\"}"); + var evaluator = new AIJudgeLoopEvaluator(judgeClient); + LoopContext context = CreateContext(); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.NotNull(evaluation.Feedback); + Assert.Contains("the cost estimate is missing", evaluation.Feedback!); + Assert.DoesNotContain(AIJudgeLoopEvaluator.GapAnalysisPlaceholder, evaluation.Feedback!); + } + + /// + /// Verify that the evaluator falls back to text parsing and stops when the DONE verdict marker is present. + /// + [Fact] + public async Task EvaluateAsync_TextFallback_StopsWhenAnsweredAsync() + { + // Arrange + var judgeClient = CreateJudgeClient(AIJudgeLoopEvaluator.DoneVerdictMarker); + var evaluator = new AIJudgeLoopEvaluator(judgeClient); + LoopContext context = CreateContext(); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.False(evaluation.ShouldReinvoke); + } + + /// + /// Verify that the gap-analysis placeholder is filled with a fallback token when no structured output is produced. + /// + [Fact] + public async Task EvaluateAsync_NotAnswered_TextFallback_InjectsUnknownGapAnalysisAsync() + { + // Arrange + var judgeClient = CreateJudgeClient(AIJudgeLoopEvaluator.MoreVerdictMarker); + var evaluator = new AIJudgeLoopEvaluator(judgeClient); + LoopContext context = CreateContext(); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.Contains("", evaluation.Feedback!); + } + + /// + /// Verify that the text fallback keeps looping for replies that merely contain the substring "ANSWERED" (for + /// example "UNANSWERED" or "NOT ANSWERED") rather than the explicit DONE verdict marker. + /// + [Theory] + [InlineData("UNANSWERED")] + [InlineData("NOT ANSWERED")] + [InlineData("The request is not yet answered.")] + public async Task EvaluateAsync_TextFallback_AmbiguousReply_ContinuesAsync(string reply) + { + // Arrange + var judgeClient = CreateJudgeClient(reply); + var evaluator = new AIJudgeLoopEvaluator(judgeClient); + LoopContext context = CreateContext(); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + } + + /// + /// Verify that custom judge instructions from options are sent to the judge client. + /// + [Fact] + public async Task EvaluateAsync_CustomInstructions_AreSentToJudgeAsync() + { + // Arrange + List? judgeMessages = null; + var judgeMock = new Mock(); + judgeMock.Setup(c => c.GetResponseAsync( + It.IsAny>(), + It.IsAny(), + It.IsAny())) + .Callback, ChatOptions?, CancellationToken>((msgs, _, _) => judgeMessages = msgs.ToList()) + .ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{\"answered\":true}"))); + var evaluator = new AIJudgeLoopEvaluator(judgeMock.Object, new AIJudgeLoopEvaluatorOptions { Instructions = "CUSTOM JUDGE PROMPT" }); + LoopContext context = CreateContext(); + + // Act + await evaluator.EvaluateAsync(context); + + // Assert + Assert.NotNull(judgeMessages); + Assert.Contains(judgeMessages!, m => m.Role == ChatRole.System && m.Text == "CUSTOM JUDGE PROMPT"); + } + + /// + /// Verify that a custom feedback message template from options is honored. + /// + [Fact] + public async Task EvaluateAsync_CustomFeedbackMessageTemplate_IsHonoredAsync() + { + // Arrange + var judgeClient = CreateJudgeClient("{\"answered\":false,\"gapAnalysis\":\"add unit tests\"}"); + const string Template = "Please address: " + AIJudgeLoopEvaluator.GapAnalysisPlaceholder; + var evaluator = new AIJudgeLoopEvaluator(judgeClient, new AIJudgeLoopEvaluatorOptions { FeedbackMessageTemplate = Template }); + LoopContext context = CreateContext(); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.Equal("Please address: add unit tests", evaluation.Feedback); + } + + /// + /// Verify that non-text content in the original request (for example an image) is forwarded to the judge + /// rather than being silently dropped when flattening the request to text. + /// + [Fact] + public async Task EvaluateAsync_NonTextRequestContent_IsForwardedToJudgeAsync() + { + // Arrange + List? judgeMessages = null; + var judgeMock = new Mock(); + judgeMock.Setup(c => c.GetResponseAsync( + It.IsAny>(), + It.IsAny(), + It.IsAny())) + .Callback, ChatOptions?, CancellationToken>((msgs, _, _) => judgeMessages = msgs.ToList()) + .ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{\"answered\":true}"))); + var evaluator = new AIJudgeLoopEvaluator(judgeMock.Object); + var imageContent = new DataContent(new byte[] { 1, 2, 3, 4 }, "image/png"); + var context = new LoopContext( + new Mock().Object, + new ChatClientAgentSession(), + [new ChatMessage(ChatRole.User, [imageContent])], + new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial answer")])); + + // Act + await evaluator.EvaluateAsync(context); + + // Assert + Assert.NotNull(judgeMessages); + ChatMessage userMessage = Assert.Single(judgeMessages!, m => m.Role == ChatRole.User); + Assert.Contains(userMessage.Contents.OfType(), c => c.MediaType == "image/png"); + } + + /// + /// Verify that the constructor throws when the judge client is null. + /// + [Fact] + public void AIJudgeLoopEvaluator_NullClient_Throws() + { + // Act & Assert + Assert.Throws("judgeClient", () => new AIJudgeLoopEvaluator(null!)); + } + + /// + /// Verify that EvaluateAsync throws when the context is null. + /// + [Fact] + public async Task EvaluateAsync_NullContext_ThrowsAsync() + { + // Arrange + var evaluator = new AIJudgeLoopEvaluator(CreateJudgeClient("{\"answered\":true}")); + + // Act & Assert + await Assert.ThrowsAsync("context", async () => await evaluator.EvaluateAsync(null!)); + } + + /// + /// Verify that supplied criteria are rendered into the default judge instructions as a bullet list and the + /// placeholder is consumed. + /// + [Fact] + public async Task EvaluateAsync_Criteria_AreRenderedIntoDefaultInstructionsAsync() + { + // Arrange + var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List judgeMessages); + var options = new AIJudgeLoopEvaluatorOptions { Criteria = ["Must cite sources", "Must be under 200 words"] }; + var evaluator = new AIJudgeLoopEvaluator(judgeClient, options); + LoopContext context = CreateContext(); + + // Act + await evaluator.EvaluateAsync(context); + + // Assert + string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text; + Assert.Contains("The response must satisfy all of the following criteria:", system); + Assert.Contains("- Must cite sources", system); + Assert.Contains("- Must be under 200 words", system); + Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system); + } + + /// + /// Verify that when no criteria are supplied the placeholder is removed and no criteria block is added to the + /// default instructions. + /// + [Fact] + public async Task EvaluateAsync_NoCriteria_LeavesDefaultInstructionsWithoutCriteriaBlockAsync() + { + // Arrange + var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List judgeMessages); + var evaluator = new AIJudgeLoopEvaluator(judgeClient); + LoopContext context = CreateContext(); + + // Act + await evaluator.EvaluateAsync(context); + + // Assert + string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text; + Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system); + Assert.DoesNotContain("The response must satisfy all of the following criteria:", system); + } + + /// + /// Verify that criteria are injected at the placeholder location in custom instructions. + /// + [Fact] + public async Task EvaluateAsync_CustomInstructionsWithPlaceholder_InjectsCriteriaAsync() + { + // Arrange + var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List judgeMessages); + const string Instructions = "Judge the answer." + AIJudgeLoopEvaluator.CriteriaPlaceholder + " Be strict."; + var options = new AIJudgeLoopEvaluatorOptions { Instructions = Instructions, Criteria = ["Must include code"] }; + var evaluator = new AIJudgeLoopEvaluator(judgeClient, options); + LoopContext context = CreateContext(); + + // Act + await evaluator.EvaluateAsync(context); + + // Assert + string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text; + Assert.StartsWith("Judge the answer.", system); + Assert.EndsWith("Be strict.", system); + Assert.Contains("- Must include code", system); + Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system); + } + + /// + /// Verify that custom instructions without the placeholder do not receive the criteria. + /// + [Fact] + public async Task EvaluateAsync_CustomInstructionsWithoutPlaceholder_OmitsCriteriaAsync() + { + // Arrange + var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List judgeMessages); + const string Instructions = "Judge the answer and be strict."; + var options = new AIJudgeLoopEvaluatorOptions { Instructions = Instructions, Criteria = ["Must include code"] }; + var evaluator = new AIJudgeLoopEvaluator(judgeClient, options); + LoopContext context = CreateContext(); + + // Act + await evaluator.EvaluateAsync(context); + + // Assert + string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text; + Assert.Equal(Instructions, system); + } + + private static LoopContext CreateContext() => new( + new Mock().Object, + new ChatClientAgentSession(), + [new ChatMessage(ChatRole.User, "original question")], + new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial answer")])); +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/CompletionMarkerLoopEvaluatorTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/CompletionMarkerLoopEvaluatorTests.cs new file mode 100644 index 0000000000..81f6cc532f --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/CompletionMarkerLoopEvaluatorTests.cs @@ -0,0 +1,145 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Moq; + +namespace Microsoft.Agents.AI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class CompletionMarkerLoopEvaluatorTests +{ + /// + /// Verify that the constructor throws when the marker is null, empty, or whitespace. + /// + /// The invalid marker value. + [Theory] + [InlineData(null)] + [InlineData("")] + [InlineData(" ")] + public void CompletionMarkerLoopEvaluator_InvalidMarker_Throws(string? marker) + { + // Act & Assert + Assert.ThrowsAny(() => new CompletionMarkerLoopEvaluator(marker!)); + } + + /// + /// Verify that the evaluator stops the loop when the marker appears in the latest response. + /// + [Fact] + public async Task EvaluateAsync_MarkerPresent_StopsAsync() + { + // Arrange + var evaluator = new CompletionMarkerLoopEvaluator("DONE"); + LoopContext context = CreateContext("all DONE here"); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.False(evaluation.ShouldReinvoke); + } + + /// + /// Verify that the evaluator continues with default feedback (containing the marker) when the marker is absent. + /// + [Fact] + public async Task EvaluateAsync_MarkerAbsent_ContinuesWithDefaultFeedbackAsync() + { + // Arrange + var evaluator = new CompletionMarkerLoopEvaluator("DONE"); + LoopContext context = CreateContext("still working"); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.NotNull(evaluation.Feedback); + Assert.Contains("DONE", evaluation.Feedback!); + Assert.DoesNotContain(CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder, evaluation.Feedback!); + } + + /// + /// Verify that a custom feedback template is honored, with the completion marker substituted for the placeholder. + /// + [Fact] + public async Task EvaluateAsync_MarkerAbsent_CustomTemplate_IsHonoredAsync() + { + // Arrange + const string Template = "Keep going and finish with " + CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when done."; + var evaluator = new CompletionMarkerLoopEvaluator("FINISHED", new CompletionMarkerLoopEvaluatorOptions { FeedbackMessageTemplate = Template }); + LoopContext context = CreateContext("still working"); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.Equal("Keep going and finish with FINISHED when done.", evaluation.Feedback); + } + + /// + /// Verify that a custom feedback template containing the last-response placeholder echoes the agent's latest + /// response text, with no leftover placeholder. + /// + [Fact] + public async Task EvaluateAsync_MarkerAbsent_CustomTemplate_SubstitutesLastResponseAsync() + { + // Arrange + const string Template = "Your previous attempt was: '" + CompletionMarkerLoopEvaluator.LastResponsePlaceholder + + "'. Improve it and finish with " + CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when done."; + var evaluator = new CompletionMarkerLoopEvaluator("FINISHED", new CompletionMarkerLoopEvaluatorOptions { FeedbackMessageTemplate = Template }); + LoopContext context = CreateContext("candidate name: NoteNest"); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.Equal("Your previous attempt was: 'candidate name: NoteNest'. Improve it and finish with FINISHED when done.", evaluation.Feedback); + Assert.DoesNotContain(CompletionMarkerLoopEvaluator.LastResponsePlaceholder, evaluation.Feedback!); + } + + /// + /// Verify that the default feedback template does not include the agent's latest response text (the last-response + /// placeholder is opt-in via a custom template). + /// + [Fact] + public async Task EvaluateAsync_MarkerAbsent_DefaultTemplate_DoesNotIncludeLastResponseAsync() + { + // Arrange + var evaluator = new CompletionMarkerLoopEvaluator("DONE"); + LoopContext context = CreateContext("candidate name: NoteNest"); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.Equal(CompletionMarkerLoopEvaluator.DefaultFeedbackMessageTemplate.Replace(CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder, "DONE"), evaluation.Feedback); + Assert.DoesNotContain("NoteNest", evaluation.Feedback!); + } + + /// + /// Verify that EvaluateAsync throws when the context is null. + /// + [Fact] + public async Task EvaluateAsync_NullContext_ThrowsAsync() + { + // Arrange + var evaluator = new CompletionMarkerLoopEvaluator("DONE"); + + // Act & Assert + await Assert.ThrowsAsync("context", async () => await evaluator.EvaluateAsync(null!)); + } + + private static LoopContext CreateContext(string responseText) => new( + new Mock().Object, + new ChatClientAgentSession(), + [new ChatMessage(ChatRole.User, "go")], + new AgentResponse([new ChatMessage(ChatRole.Assistant, responseText)])); +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/DelegateLoopEvaluatorTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/DelegateLoopEvaluatorTests.cs new file mode 100644 index 0000000000..8718fe9250 --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/DelegateLoopEvaluatorTests.cs @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Moq; + +namespace Microsoft.Agents.AI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class DelegateLoopEvaluatorTests +{ + /// + /// Verify that the constructor throws when the evaluate delegate is null. + /// + [Fact] + public void DelegateLoopEvaluator_NullDelegate_Throws() + { + // Act & Assert + Assert.Throws("evaluate", () => new DelegateLoopEvaluator(null!)); + } + + /// + /// Verify that EvaluateAsync throws when the context is null. + /// + [Fact] + public async Task EvaluateAsync_NullContext_ThrowsAsync() + { + // Arrange + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Stop())); + + // Act & Assert + await Assert.ThrowsAsync("context", async () => await evaluator.EvaluateAsync(null!)); + } + + /// + /// Verify that EvaluateAsync invokes the supplied delegate and returns the evaluation it produces. + /// + [Fact] + public async Task EvaluateAsync_InvokesDelegate_AndReturnsItsEvaluationAsync() + { + // Arrange + bool invoked = false; + var expected = LoopEvaluation.Continue("feedback"); + var evaluator = new DelegateLoopEvaluator((_, _) => + { + invoked = true; + return new ValueTask(expected); + }); + LoopContext context = CreateContext(); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.True(invoked); + Assert.Same(expected, evaluation); + } + + /// + /// Verify that EvaluateAsync passes the same context instance to the delegate. + /// + [Fact] + public async Task EvaluateAsync_PassesContextToDelegateAsync() + { + // Arrange + LoopContext? received = null; + var evaluator = new DelegateLoopEvaluator((ctx, _) => + { + received = ctx; + return new ValueTask(LoopEvaluation.Stop()); + }); + LoopContext context = CreateContext(); + + // Act + await evaluator.EvaluateAsync(context); + + // Assert + Assert.Same(context, received); + } + + /// + /// Verify that EvaluateAsync forwards the cancellation token to the delegate. + /// + [Fact] + public async Task EvaluateAsync_ForwardsCancellationTokenToDelegateAsync() + { + // Arrange + using var cts = new CancellationTokenSource(); + CancellationToken received = default; + var evaluator = new DelegateLoopEvaluator((_, ct) => + { + received = ct; + return new ValueTask(LoopEvaluation.Stop()); + }); + LoopContext context = CreateContext(); + + // Act + await evaluator.EvaluateAsync(context, cts.Token); + + // Assert + Assert.Equal(cts.Token, received); + } + + private static LoopContext CreateContext() => new( + new Mock().Object, + new ChatClientAgentSession(), + [new ChatMessage(ChatRole.User, "go")], + new AgentResponse([new ChatMessage(ChatRole.Assistant, "response")])); +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopAgentTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopAgentTests.cs new file mode 100644 index 0000000000..428298f1d6 --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopAgentTests.cs @@ -0,0 +1,1231 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Moq; +using Moq.Protected; + +using static Microsoft.Agents.AI.UnitTests.LoopTestHelpers; + +namespace Microsoft.Agents.AI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class LoopAgentTests +{ + #region Constructor + + /// + /// Verify that the constructor throws when innerAgent is null. + /// + [Fact] + public void Constructor_NullInnerAgent_Throws() + { + // Arrange + var evaluator = While(static _ => false); + + // Act & Assert + Assert.Throws("innerAgent", () => new LoopAgent(null!, evaluator)); + } + + /// + /// Verify that the constructor throws when the evaluator is null. + /// + [Fact] + public void Constructor_NullEvaluator_Throws() + { + // Arrange + var innerAgent = new Mock().Object; + + // Act & Assert + Assert.Throws("evaluator", () => new LoopAgent(innerAgent, (LoopEvaluator)null!)); + } + + /// + /// Verify that the constructor throws when the evaluators collection is null. + /// + [Fact] + public void Constructor_NullEvaluators_Throws() + { + // Arrange + var innerAgent = new Mock().Object; + + // Act & Assert + Assert.Throws("evaluators", () => new LoopAgent(innerAgent, (IEnumerable)null!)); + } + + /// + /// Verify that the constructor throws when the evaluators collection is empty. + /// + [Fact] + public void Constructor_EmptyEvaluators_Throws() + { + // Arrange + var innerAgent = new Mock().Object; + + // Act & Assert + Assert.Throws("evaluators", () => new LoopAgent(innerAgent, Array.Empty())); + } + + /// + /// Verify that the constructor throws when the evaluators collection contains a null element. + /// + [Fact] + public void Constructor_NullEvaluatorElement_Throws() + { + // Arrange + var innerAgent = new Mock().Object; + + // Act & Assert + Assert.Throws("evaluators", () => new LoopAgent(innerAgent, new LoopEvaluator[] { null! })); + } + + /// + /// Verify that the constructor throws when MaxIterations is less than 1. + /// + [Fact] + public void Constructor_InvalidMaxIterations_Throws() + { + // Arrange + var innerAgent = new Mock().Object; + var evaluator = While(static _ => false); + var options = new LoopAgentOptions { MaxIterations = 0 }; + + // Act & Assert + Assert.Throws(() => new LoopAgent(innerAgent, evaluator, options)); + } + + /// + /// Verify that the constructor creates a valid instance with default options. + /// + [Fact] + public void Constructor_ValidArguments_CreatesInstance() + { + // Arrange + var innerAgent = new Mock().Object; + var evaluator = While(static _ => false); + + // Act + var agent = new LoopAgent(innerAgent, evaluator); + + // Assert + Assert.NotNull(agent); + } + + #endregion + + #region RunAsync - core loop behavior + + /// + /// Verify that when the evaluator stops immediately the inner agent is invoked exactly once. + /// + [Fact] + public async Task RunAsync_EvaluatorStopsImmediately_InvokesOnceAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "done")])); + var evaluator = While(static _ => false); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal("done", response.Text); + Assert.Equal(1, capture.CallCount); + } + + /// + /// Verify that the loop re-invokes while the predicate returns true and the aggregated response contains every + /// iteration's messages in order. + /// + [Fact] + public async Task RunAsync_PredicateLoopsUntilFalse_AggregatesAllIterationsAsync() + { + // Arrange + var capture = new InnerAgentCapture(call => + new AgentResponse([new ChatMessage(ChatRole.Assistant, $"iteration {call}")])); + + // Continue while the latest response is not "iteration 3". + var evaluator = While(ctx => ctx.LastResponse.Text != "iteration 3"); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Equal(["iteration 1", "iteration 2", "iteration 3"], response.Messages.Select(static m => m.Text)); + } + + /// + /// Verify that returns only the final + /// iteration's response instead of the aggregated transcript. + /// + [Fact] + public async Task RunAsync_LastResponseOnly_ReturnsFinalResponseAsync() + { + // Arrange + var capture = new InnerAgentCapture(call => + new AgentResponse([new ChatMessage(ChatRole.Assistant, $"iteration {call}")])); + var evaluator = While(ctx => ctx.LastResponse.Text != "iteration 3"); + var options = new LoopAgentOptions { NonStreamingReturnsLastResponseOnly = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Equal("iteration 3", response.Text); + Assert.Single(response.Messages); + } + + /// + /// Verify that the caller's initial messages are sent once and a re-invocation without feedback sends none. + /// + [Fact] + public async Task RunAsync_ContinueWithoutFeedback_SendsInitialOnceThenNoneAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue() : LoopEvaluation.Stop())); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal("original", capture.MessagesPerCall[0].Single().Text); + Assert.Empty(capture.MessagesPerCall[1]); + } + + /// + /// Verify that feedback supplied by the evaluator is injected verbatim on re-invocation (non-fresh mode). + /// + [Fact] + public async Task RunAsync_EvaluatorSuppliesFeedback_InjectsItVerbatimAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("custom follow-up") : LoopEvaluation.Stop())); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal("custom follow-up", capture.MessagesPerCall[1].Single().Text); + } + + /// + /// Verify that an evaluator using sends the messages verbatim and + /// records an aligned feedback entry (it carries no feedback string). + /// + [Fact] + public async Task RunAsync_ContinueWithMessages_SendsMessagesVerbatimAndRecordsNullFeedbackAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + IReadOnlyList? feedbackSnapshot = null; + var evaluator = new DelegateLoopEvaluator((ctx, _) => + { + if (ctx.Iteration < 2) + { + return new ValueTask(LoopEvaluation.ContinueWithMessages( + [new ChatMessage(ChatRole.System, "sys"), new ChatMessage(ChatRole.User, "explicit")])); + } + + feedbackSnapshot = ctx.Feedback.ToList(); + return new ValueTask(LoopEvaluation.Stop()); + }); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal(["sys", "explicit"], capture.MessagesPerCall[1].Select(static m => m.Text)); + Assert.NotNull(feedbackSnapshot); + // One aligned entry for the single re-invoked iteration; null because ContinueWithMessages carries no feedback string. + Assert.Equal([null], feedbackSnapshot!); + } + + /// + /// Verify that the global safety cap stops the loop even when the evaluator always continues. + /// + [Fact] + public async Task RunAsync_AlwaysContinue_StopsAtGlobalCapAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "working")])); + var evaluator = While(static _ => true); + var options = new LoopAgentOptions { MaxIterations = 3 }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Equal(["working", "working", "working"], response.Messages.Select(static m => m.Text)); + } + + /// + /// Verify that a pending tool-approval request terminates the loop and returns that response. + /// + [Fact] + public async Task RunAsync_PendingApprovalRequest_StopsLoopAsync() + { + // Arrange + var approvalRequest = new ToolApprovalRequestContent("req1", new FunctionCallContent("call1", "MyTool")); + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, [approvalRequest])])); + var evaluator = While(static _ => true); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(1, capture.CallCount); + Assert.Contains(response.Messages.SelectMany(static m => m.Contents), static c => c is ToolApprovalRequestContent); + } + + /// + /// Verify that when no session is supplied the loop creates one and invokes the agent. + /// + [Fact] + public async Task RunAsync_NoSessionSupplied_CreatesSessionAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "done")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(new ValueTask(new ChatClientAgentSession())); + var evaluator = While(static _ => false); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "go")]); + + // Assert + Assert.Equal("done", response.Text); + capture.Mock.Protected().Verify("CreateSessionCoreAsync", Times.Once(), ItExpr.IsAny()); + } + + #endregion + + #region RunAsync - feedback log + + /// + /// Verify that in the default (non-fresh) mode the latest feedback is injected verbatim as the next input. + /// + [Fact] + public async Task RunAsync_NonFresh_InjectsLatestFeedbackVerbatimAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial")])); + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Continue("fix it"))); + var options = new LoopAgentOptions { MaxIterations = 2 }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal("fix it", capture.MessagesPerCall[1].Single().Text); + } + + /// + /// Verify that when the latest iteration produces no feedback, no stale earlier feedback is re-injected (non-fresh). + /// + [Fact] + public async Task RunAsync_NonFresh_LatestEmpty_DoesNotReinjectStaleFeedbackAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial")])); + + // Provide feedback only on the first iteration; the second records nothing. + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask(LoopEvaluation.Continue(ctx.Iteration == 1 ? "feedback 1" : null))); + var options = new LoopAgentOptions { MaxIterations = 3 }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Equal("feedback 1", capture.MessagesPerCall[1].Single().Text); + Assert.Empty(capture.MessagesPerCall[2]); + } + + /// + /// Verify that the accumulated feedback log is exposed read-only and shared across all evaluators in a run. + /// + [Fact] + public async Task RunAsync_FeedbackLog_IsSharedAcrossEvaluatorsAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial")])); + var observed = new List(); + var producer = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 3 ? LoopEvaluation.Continue($"fb {ctx.Iteration}") : LoopEvaluation.Stop())); + var observer = new DelegateLoopEvaluator((ctx, _) => + { + // The observer runs only when the producer stops; it sees the full feedback log. + observed.Add(ctx.Feedback.Count); + return new ValueTask(LoopEvaluation.Stop()); + }); + var options = new LoopAgentOptions { MaxIterations = 5 }; + var agent = new LoopAgent(capture.Agent, new LoopEvaluator[] { producer, observer }, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(3, capture.CallCount); + // On the third iteration the producer stops, the observer runs and sees two recorded feedback entries. + Assert.Equal([2], observed); + } + + /// + /// Verify that iterations driven by still record an (aligned) + /// entry in the feedback log, so the log stays one-entry-per-re-invoked-iteration. The explicit-messages iteration + /// contributes a entry since it carries no feedback string. + /// + [Fact] + public async Task RunAsync_ContinueWithMessages_RecordsNullFeedbackEntryAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial")])); + List? finalLog = null; + var evaluator = new DelegateLoopEvaluator((ctx, _) => + { + // Capture the log on the final evaluation, after both re-invocations have been recorded. + if (ctx.Iteration >= 3) + { + finalLog = ctx.Feedback.ToList(); + return new ValueTask(LoopEvaluation.Stop()); + } + + // Iteration 1 drives a feedback-string re-invocation; iteration 2 drives an explicit-messages one. + return new ValueTask(ctx.Iteration == 1 + ? LoopEvaluation.Continue("needs work") + : LoopEvaluation.ContinueWithMessages([new ChatMessage(ChatRole.User, "explicit")])); + }); + var options = new LoopAgentOptions { MaxIterations = 5 }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.NotNull(finalLog); + // One entry per re-invoked iteration: the feedback string, then null for the ContinueWithMessages iteration. + Assert.Equal(["needs work", null], finalLog!); + } + + #endregion + + #region RunAsync - fresh context + + /// + /// Verify that without fresh context the loop reuses a single session across all iterations. + /// + [Fact] + public async Task RunAsync_NonFresh_ReusesSameSessionAcrossIterationsAsync() + { + // Arrange + var loopSession = new ChatClientAgentSession(); + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(new ValueTask(loopSession)); + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Continue("more"))); + var options = new LoopAgentOptions { MaxIterations = 3 }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act (no session supplied by caller) + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")]); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Same(loopSession, capture.SessionsPerCall[0]); + Assert.Same(loopSession, capture.SessionsPerCall[1]); + Assert.Same(loopSession, capture.SessionsPerCall[2]); + } + + /// + /// Verify that with fresh context each iteration is rebuilt from the original messages plus the aggregated feedback log. + /// + [Fact] + public async Task RunAsync_Fresh_RebuildsFromInitialMessagesAndAggregatedFeedbackAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(() => new ValueTask(new ChatClientAgentSession())); + var evaluator = new DelegateLoopEvaluator((ctx, _) => new ValueTask(LoopEvaluation.Continue($"fb {ctx.Iteration}"))); + var options = new LoopAgentOptions { MaxIterations = 3, FreshContextPerIteration = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act (no session supplied by caller) + await agent.RunAsync([new ChatMessage(ChatRole.User, "original task")]); + + // Assert + Assert.Equal(3, capture.CallCount); + var secondCall = capture.MessagesPerCall[1]; + Assert.Contains(secondCall, static m => m.Text == "original task"); + Assert.Contains(secondCall, static m => m.Text.Contains("## Feedback") && m.Text.Contains("fb 1")); + var thirdCall = capture.MessagesPerCall[2]; + Assert.Contains(thirdCall, static m => m.Text == "original task"); + Assert.Contains(thirdCall, static m => m.Text.Contains("fb 1") && m.Text.Contains("fb 2")); + } + + /// + /// Verify that with fresh context and a loop-owned session, a new session is created for each iteration. + /// + [Fact] + public async Task RunAsync_Fresh_RecreatesSessionEachIterationAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(() => new ValueTask(new ChatClientAgentSession())); + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Continue("more"))); + var options = new LoopAgentOptions { MaxIterations = 3, FreshContextPerIteration = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act (no session supplied by caller) + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")]); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.NotSame(capture.SessionsPerCall[0], capture.SessionsPerCall[1]); + Assert.NotSame(capture.SessionsPerCall[1], capture.SessionsPerCall[2]); + } + + /// + /// Verify that with fresh context and a caller-supplied session, the caller's session is used for the first + /// iteration, then each re-invocation runs against a fresh clone restored from a snapshot taken at the start of + /// the run. The session is serialized once and deserialized once per re-invocation. + /// + [Fact] + public async Task RunAsync_Fresh_WithCallerSession_ClonesFromSerializedSnapshotAsync() + { + // Arrange + var callerSession = new ChatClientAgentSession(); + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + using var snapshotDoc = JsonDocument.Parse("{}"); + JsonElement snapshot = snapshotDoc.RootElement; + + int serializeCount = 0; + capture.Mock + .Protected() + .Setup>("SerializeSessionCoreAsync", ItExpr.IsAny(), ItExpr.IsAny(), ItExpr.IsAny()) + .Returns(() => { serializeCount++; return new ValueTask(snapshot); }); + + int deserializeCount = 0; + capture.Mock + .Protected() + .Setup>("DeserializeSessionCoreAsync", ItExpr.IsAny(), ItExpr.IsAny(), ItExpr.IsAny()) + .Returns(() => { deserializeCount++; return new ValueTask(new ChatClientAgentSession()); }); + + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Continue("more"))); + var options = new LoopAgentOptions { MaxIterations = 3, FreshContextPerIteration = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], callerSession); + + // Assert + Assert.Equal(3, capture.CallCount); + + // The pristine session is snapshotted exactly once, before the first iteration mutates it. + Assert.Equal(1, serializeCount); + + // Re-invocations (iterations 2 and 3) each restore a fresh clone from the snapshot. + Assert.Equal(2, deserializeCount); + + // The first iteration runs against the caller's supplied session; later iterations use distinct clones. + Assert.Same(callerSession, capture.SessionsPerCall[0]); + Assert.NotSame(callerSession, capture.SessionsPerCall[1]); + Assert.NotSame(callerSession, capture.SessionsPerCall[2]); + Assert.NotSame(capture.SessionsPerCall[1], capture.SessionsPerCall[2]); + + // The loop never creates a new session for a caller-supplied one; it clones instead. + capture.Mock.Protected().Verify("CreateSessionCoreAsync", Times.Never(), ItExpr.IsAny()); + } + + /// + /// Verify that with fresh context and a loop-owned session, the session is reset for each iteration even when the + /// evaluator drives re-invocation via : the explicit messages are + /// still sent verbatim, but each iteration runs against a new session. + /// + [Fact] + public async Task RunAsync_Fresh_WithContinueWithMessages_RecreatesSessionAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(() => new ValueTask(new ChatClientAgentSession())); + var evaluator = new DelegateLoopEvaluator((_, _) => + new ValueTask(LoopEvaluation.ContinueWithMessages([new ChatMessage(ChatRole.User, "explicit")]))); + var options = new LoopAgentOptions { MaxIterations = 3, FreshContextPerIteration = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act (no session supplied by caller) + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")]); + + // Assert + Assert.Equal(3, capture.CallCount); + + // The explicit messages are sent verbatim on each re-invocation. + Assert.Equal(["explicit"], capture.MessagesPerCall[1].Select(static m => m.Text)); + Assert.Equal(["explicit"], capture.MessagesPerCall[2].Select(static m => m.Text)); + + // The session is still reset for each iteration despite using ContinueWithMessages. + Assert.NotSame(capture.SessionsPerCall[0], capture.SessionsPerCall[1]); + Assert.NotSame(capture.SessionsPerCall[1], capture.SessionsPerCall[2]); + } + + /// + /// Verify that with fresh context and a caller-supplied session, the session is cloned from the start-of-run + /// snapshot for each re-invocation even when the evaluator drives re-invocation via + /// . + /// + [Fact] + public async Task RunAsync_Fresh_WithCallerSession_AndContinueWithMessages_ClonesFromSnapshotAsync() + { + // Arrange + var callerSession = new ChatClientAgentSession(); + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + using var snapshotDoc = JsonDocument.Parse("{}"); + JsonElement snapshot = snapshotDoc.RootElement; + + int serializeCount = 0; + capture.Mock + .Protected() + .Setup>("SerializeSessionCoreAsync", ItExpr.IsAny(), ItExpr.IsAny(), ItExpr.IsAny()) + .Returns(() => { serializeCount++; return new ValueTask(snapshot); }); + + int deserializeCount = 0; + capture.Mock + .Protected() + .Setup>("DeserializeSessionCoreAsync", ItExpr.IsAny(), ItExpr.IsAny(), ItExpr.IsAny()) + .Returns(() => { deserializeCount++; return new ValueTask(new ChatClientAgentSession()); }); + + var evaluator = new DelegateLoopEvaluator((_, _) => + new ValueTask(LoopEvaluation.ContinueWithMessages([new ChatMessage(ChatRole.User, "explicit")]))); + var options = new LoopAgentOptions { MaxIterations = 3, FreshContextPerIteration = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], callerSession); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Equal(1, serializeCount); + Assert.Equal(2, deserializeCount); + + // First iteration uses the caller session; later iterations use distinct clones from the snapshot. + Assert.Same(callerSession, capture.SessionsPerCall[0]); + Assert.NotSame(callerSession, capture.SessionsPerCall[1]); + Assert.NotSame(capture.SessionsPerCall[1], capture.SessionsPerCall[2]); + capture.Mock.Protected().Verify("CreateSessionCoreAsync", Times.Never(), ItExpr.IsAny()); + } + + /// + /// Verify that the configured is invoked with the loop-owned + /// session the loop creates when the caller does not supply one, even without fresh context. + /// + [Fact] + public async Task RunAsync_SessionCreatedCallback_NotifiesLoopOwnedSessionAsync() + { + // Arrange + var created = new ChatClientAgentSession(); + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(() => new ValueTask(created)); + var observed = new List(); + var options = new LoopAgentOptions + { + SessionCreatedCallback = (s, _) => { observed.Add(s); return default; }, + }; + var agent = new LoopAgent(capture.Agent, While(static _ => false), options); + + // Act (no session supplied by caller) + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")]); + + // Assert + Assert.Equal(1, capture.CallCount); + Assert.Same(created, Assert.Single(observed)); + Assert.Same(created, capture.SessionsPerCall[0]); + } + + /// + /// Verify that the is not invoked when the caller supplies a + /// session and no fresh context is requested (no new session is created). + /// + [Fact] + public async Task RunAsync_SessionCreatedCallback_NotInvokedForCallerSessionAsync() + { + // Arrange + var callerSession = new ChatClientAgentSession(); + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + var observed = new List(); + var options = new LoopAgentOptions + { + MaxIterations = 3, + SessionCreatedCallback = (s, _) => { observed.Add(s); return default; }, + }; + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Continue("more"))); + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], callerSession); + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Empty(observed); + } + + /// + /// Verify that with fresh context and a loop-owned session, the + /// is invoked for the initial session and for each session created for a re-invocation, in order. + /// + [Fact] + public async Task RunAsync_Fresh_SessionCreatedCallback_NotifiesEachCreatedSessionAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(() => new ValueTask(new ChatClientAgentSession())); + var observed = new List(); + var options = new LoopAgentOptions + { + MaxIterations = 3, + FreshContextPerIteration = true, + SessionCreatedCallback = (s, _) => { observed.Add(s); return default; }, + }; + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Continue("more"))); + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act (no session supplied by caller) + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")]); + + // Assert: one notification for the initial session plus one per re-invocation (iterations 2 and 3). + Assert.Equal(3, capture.CallCount); + Assert.Equal(3, observed.Count); + Assert.Equal(capture.SessionsPerCall, observed); + } + + /// + /// Verify that with fresh context and a caller-supplied session, the + /// is invoked only for the cloned sessions created for + /// re-invocations, not for the caller's own session. + /// + [Fact] + public async Task RunAsync_Fresh_WithCallerSession_SessionCreatedCallback_NotifiesClonesOnlyAsync() + { + // Arrange + var callerSession = new ChatClientAgentSession(); + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "x")])); + using var snapshotDoc = JsonDocument.Parse("{}"); + JsonElement snapshot = snapshotDoc.RootElement; + capture.Mock + .Protected() + .Setup>("SerializeSessionCoreAsync", ItExpr.IsAny(), ItExpr.IsAny(), ItExpr.IsAny()) + .Returns(() => new ValueTask(snapshot)); + capture.Mock + .Protected() + .Setup>("DeserializeSessionCoreAsync", ItExpr.IsAny(), ItExpr.IsAny(), ItExpr.IsAny()) + .Returns(() => new ValueTask(new ChatClientAgentSession())); + var observed = new List(); + var options = new LoopAgentOptions + { + MaxIterations = 3, + FreshContextPerIteration = true, + SessionCreatedCallback = (s, _) => { observed.Add(s); return default; }, + }; + var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask(LoopEvaluation.Continue("more"))); + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], callerSession); + + // Assert: the caller session is never reported; only the two clones used for re-invocations are. + Assert.Equal(3, capture.CallCount); + Assert.DoesNotContain(callerSession, observed); + Assert.Equal([capture.SessionsPerCall[1]!, capture.SessionsPerCall[2]!], observed); + } + [Fact] + public async Task RunAsync_MultipleEvaluators_FirstReinvokeWinsAndShortCircuitsAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + + var firstEvaluated = 0; + var secondEvaluated = 0; + var first = new DelegateLoopEvaluator((ctx, _) => + { + firstEvaluated++; + return new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("from first") : LoopEvaluation.Stop()); + }); + var second = new DelegateLoopEvaluator((_, _) => + { + secondEvaluated++; + return new ValueTask(LoopEvaluation.Stop()); + }); + var agent = new LoopAgent(capture.Agent, new LoopEvaluator[] { first, second }); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal("from first", capture.MessagesPerCall[1].Single().Text); + Assert.Equal(2, firstEvaluated); + // The second evaluator is only evaluated on the iteration where the first one stops. + Assert.Equal(1, secondEvaluated); + } + + /// + /// Verify that a later evaluator can cause re-invocation when an earlier evaluator asks to stop, confirming that + /// is not a veto. + /// + [Fact] + public async Task RunAsync_MultipleEvaluators_LaterEvaluatorCanContinueAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + var alwaysStop = While(static _ => false); + var continueOnce = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("from second") : LoopEvaluation.Stop())); + var agent = new LoopAgent(capture.Agent, new LoopEvaluator[] { alwaysStop, continueOnce }); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal("from second", capture.MessagesPerCall[1].Single().Text); + } + + /// + /// Verify that the loop stops when every evaluator asks to stop. + /// + [Fact] + public async Task RunAsync_MultipleEvaluators_AllStop_StopsAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "done")])); + var first = While(static _ => false); + var second = While(static _ => false); + var agent = new LoopAgent(capture.Agent, new LoopEvaluator[] { first, second }); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(1, capture.CallCount); + } + + #endregion + + #region RunAsync - AIJudge evaluator integration + + /// + /// Verify that an (non-fresh) injects its templated feedback message verbatim + /// on re-invocation. + /// + [Fact] + public async Task RunAsync_WithAIJudgeEvaluator_NonFresh_InjectsTemplatedFeedbackMessageAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial")])); + var judgeClient = CreateJudgeClient("{\"answered\":false,\"gapAnalysis\":\"the cost estimate is missing\"}"); + var evaluator = new AIJudgeLoopEvaluator(judgeClient); + var options = new LoopAgentOptions { MaxIterations = 2 }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + string expected = AIJudgeLoopEvaluator.DefaultFeedbackMessageTemplate + .Replace(AIJudgeLoopEvaluator.GapAnalysisPlaceholder, "the cost estimate is missing"); + + // Act + await agent.RunAsync([new ChatMessage(ChatRole.User, "question")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal(expected, capture.MessagesPerCall[1].Single().Text); + } + + #endregion + + #region RunAsync - response shaping + + /// + /// Verify that a non-streaming run aggregates each iteration's on-behalf-of feedback message and response messages + /// in order, stamping the configured author name on the synthesized feedback while never echoing caller input. + /// + [Fact] + public async Task RunAsync_Aggregates_OnBehalfOfFeedbackAndResponsesAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("fix it") : LoopEvaluation.Stop())); + var options = new LoopAgentOptions { OnBehalfOfAuthorName = "loop" }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(["ack", "fix it", "ack"], response.Messages.Select(static m => m.Text)); + ChatMessage feedbackMessage = response.Messages[1]; + Assert.Equal(ChatRole.User, feedbackMessage.Role); + Assert.Equal("loop", feedbackMessage.AuthorName); + + // The on-behalf-of author name is also stamped on the message actually sent to the wrapped agent. + Assert.Equal("loop", capture.MessagesPerCall[1].Single().AuthorName); + } + + /// + /// Verify that evaluator-supplied messages are surfaced verbatim and their author name is not overwritten by the + /// loop's on-behalf-of author name. + /// + [Fact] + public async Task RunAsync_ContinueWithMessages_AreSurfacedWithoutAuthorNameOverrideAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 + ? LoopEvaluation.ContinueWithMessages([new ChatMessage(ChatRole.User, "explicit") { AuthorName = "evaluator" }]) + : LoopEvaluation.Stop())); + var options = new LoopAgentOptions { OnBehalfOfAuthorName = "loop" }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(["ack", "explicit", "ack"], response.Messages.Select(static m => m.Text)); + Assert.Equal("evaluator", response.Messages[1].AuthorName); + } + + /// + /// Verify that in fresh-context mode only the synthesized aggregated feedback message is surfaced; the replayed + /// caller input messages are not echoed. + /// + [Fact] + public async Task RunAsync_FreshContext_SurfacesOnlyAggregatedFeedbackAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + capture.Mock + .Protected() + .Setup>("CreateSessionCoreAsync", ItExpr.IsAny()) + .Returns(new ValueTask(new ChatClientAgentSession())); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("fix it") : LoopEvaluation.Stop())); + var options = new LoopAgentOptions { FreshContextPerIteration = true, OnBehalfOfAuthorName = "loop" }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act (no caller session so the loop owns and recreates the session each iteration). + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "original")]); + + // Assert + Assert.Equal(3, response.Messages.Count); + ChatMessage surfacedFeedback = response.Messages[1]; + Assert.Equal("loop", surfacedFeedback.AuthorName); + Assert.Contains("fix it", surfacedFeedback.Text); + + // The replayed caller input ("original") is sent to the agent but is not surfaced in the response. + Assert.DoesNotContain(response.Messages, static m => m.Text == "original"); + Assert.Equal(["original", surfacedFeedback.Text], capture.MessagesPerCall[1].Select(static m => m.Text)); + } + + /// + /// Verify that omits the injected on-behalf-of messages + /// from the aggregated non-streaming response while still sending them to the wrapped agent. + /// + [Fact] + public async Task RunAsync_ExcludeOnBehalfOfMessages_OmitsThemFromResponseAsync() + { + // Arrange + var capture = new InnerAgentCapture(_ => new AgentResponse([new ChatMessage(ChatRole.Assistant, "ack")])); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("fix it") : LoopEvaluation.Stop())); + var options = new LoopAgentOptions { ExcludeOnBehalfOfMessages = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var response = await agent.RunAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession()); + + // Assert + Assert.Equal(["ack", "ack"], response.Messages.Select(static m => m.Text)); + + // The feedback is still sent to the wrapped agent even though it is not surfaced. + Assert.Equal("fix it", capture.MessagesPerCall[1].Single().Text); + } + + #endregion + + #region RunStreamingAsync + + /// + /// Verify that streaming surfaces updates from every iteration and stops when the evaluator stops. + /// + [Fact] + public async Task RunStreamingAsync_MultipleIterations_StreamsAllUpdatesAsync() + { + // Arrange + var capture = new InnerStreamingCapture(call => + [new AgentResponseUpdate(ChatRole.Assistant, $"chunk {call}")]); + var evaluator = While(ctx => ctx.Iteration < 3); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + var texts = new List(); + await foreach (var update in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession())) + { + texts.Add(update.Text); + } + + // Assert + Assert.Equal(3, capture.CallCount); + Assert.Equal(["chunk 1", "chunk 2", "chunk 3"], texts); + } + + /// + /// Verify that the streaming path enforces the global safety cap like the non-streaming path. + /// + [Fact] + public async Task RunStreamingAsync_AlwaysContinue_StopsAtGlobalCapAsync() + { + // Arrange + var capture = new InnerStreamingCapture(call => [new AgentResponseUpdate(ChatRole.Assistant, $"chunk {call}")]); + var evaluator = While(static _ => true); + var options = new LoopAgentOptions { MaxIterations = 4 }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + await foreach (var _ in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession())) + { + } + + // Assert + Assert.Equal(4, capture.CallCount); + } + + /// + /// Verify that the streaming path sends the initial messages once and no messages on a feedback-less re-invocation. + /// + [Fact] + public async Task RunStreamingAsync_ContinueWithoutFeedback_SendsInitialOnceThenNoneAsync() + { + // Arrange + var capture = new InnerStreamingCapture(_ => [new AgentResponseUpdate(ChatRole.Assistant, "ack")]); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue() : LoopEvaluation.Stop())); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + await foreach (var _ in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession())) + { + } + + // Assert + Assert.Equal(2, capture.CallCount); + Assert.Equal("original", capture.MessagesPerCall[0].Single().Text); + Assert.Empty(capture.MessagesPerCall[1]); + } + + /// + /// Verify that the streaming path stops after the iteration that produces a pending approval request. + /// + [Fact] + public async Task RunStreamingAsync_PendingApprovalRequest_StopsLoopAsync() + { + // Arrange + var approvalRequest = new ToolApprovalRequestContent("req1", new FunctionCallContent("call1", "MyTool")); + var capture = new InnerStreamingCapture(_ => [new AgentResponseUpdate(ChatRole.Assistant, [approvalRequest])]); + var evaluator = While(static _ => true); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + await foreach (var _ in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "go")], new ChatClientAgentSession())) + { + } + + // Assert + Assert.Equal(1, capture.CallCount); + } + + /// + /// Verify that the streaming path emits the loop's on-behalf-of feedback as an update (with the configured author + /// name) before streaming the re-invocation it drives. + /// + [Fact] + public async Task RunStreamingAsync_SurfacesOnBehalfOfFeedbackBeforeReinvocationAsync() + { + // Arrange + var capture = new InnerStreamingCapture(i => + [new AgentResponseUpdate(ChatRole.Assistant, "ack") { ResponseId = $"resp-{i}", AgentId = $"agent-{i}" }]); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("fix it") : LoopEvaluation.Stop())); + var options = new LoopAgentOptions { OnBehalfOfAuthorName = "loop" }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var updates = new List(); + await foreach (var update in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession())) + { + updates.Add(update); + } + + // Assert + Assert.Equal(["ack", "fix it", "ack"], updates.Select(static u => u.Text)); + AgentResponseUpdate feedbackUpdate = updates[1]; + Assert.Equal(ChatRole.User, feedbackUpdate.Role); + Assert.Equal("loop", feedbackUpdate.AuthorName); + // The surfaced on-behalf-of update inherits the re-invocation iteration's ResponseId so downstream mergers + // group it with the run it drives, and carries its own unique non-null MessageId. AgentId is left unset + // because the message is synthesized by the loop, not produced by the wrapped agent. + Assert.Equal("resp-2", feedbackUpdate.ResponseId); + Assert.True(string.IsNullOrEmpty(feedbackUpdate.AgentId)); + Assert.False(string.IsNullOrEmpty(feedbackUpdate.MessageId)); + } + + /// + /// Verify that omits the injected on-behalf-of updates + /// from the streamed output while still sending the feedback to the wrapped agent. + /// + [Fact] + public async Task RunStreamingAsync_ExcludeOnBehalfOfMessages_OmitsThemFromUpdatesAsync() + { + // Arrange + var capture = new InnerStreamingCapture(_ => [new AgentResponseUpdate(ChatRole.Assistant, "ack")]); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("fix it") : LoopEvaluation.Stop())); + var options = new LoopAgentOptions { ExcludeOnBehalfOfMessages = true }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var texts = new List(); + await foreach (var update in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession())) + { + texts.Add(update.Text); + } + + // Assert + Assert.Equal(["ack", "ack"], texts); + Assert.Equal("fix it", capture.MessagesPerCall[1].Single().Text); + } + + /// + /// Verify that a surfaced on-behalf-of streaming update is assigned a generated, unique + /// when the underlying evaluator-supplied message has none, inherits the driven iteration's ResponseId, and leaves AgentId unset. + /// + [Fact] + public async Task RunStreamingAsync_ContinueWithMessages_GetsGeneratedMessageIdAndInheritsIdsAsync() + { + // Arrange + var capture = new InnerStreamingCapture(i => + [new AgentResponseUpdate(ChatRole.Assistant, "ack") { ResponseId = $"resp-{i}", AgentId = $"agent-{i}" }]); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 + ? LoopEvaluation.ContinueWithMessages([new ChatMessage(ChatRole.User, "explicit") { AuthorName = "evaluator" }]) + : LoopEvaluation.Stop())); + var agent = new LoopAgent(capture.Agent, evaluator); + + // Act + var updates = new List(); + await foreach (var update in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession())) + { + updates.Add(update); + } + + // Assert + Assert.Equal(["ack", "explicit", "ack"], updates.Select(static u => u.Text)); + AgentResponseUpdate surfaced = updates[1]; + Assert.Equal("evaluator", surfaced.AuthorName); + Assert.False(string.IsNullOrEmpty(surfaced.MessageId)); + Assert.Equal("resp-2", surfaced.ResponseId); + Assert.True(string.IsNullOrEmpty(surfaced.AgentId)); + } + + /// + /// Verify that when the wrapped agent produces no updates for an iteration, the surfaced on-behalf-of update is + /// still assigned a generated (non-null) ResponseId so it can be grouped downstream. + /// + [Fact] + public async Task RunStreamingAsync_NoInnerUpdates_GeneratesResponseIdForOnBehalfOfAsync() + { + // Arrange (the re-invocation iteration produces no updates, so its surfaced feedback has no inner ResponseId + // to inherit and must fall back to a generated one). + var capture = new InnerStreamingCapture(i => + i < 2 ? [new AgentResponseUpdate(ChatRole.Assistant, "ack")] : []); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.Iteration < 2 ? LoopEvaluation.Continue("fix it") : LoopEvaluation.Stop())); + var options = new LoopAgentOptions { OnBehalfOfAuthorName = "loop" }; + var agent = new LoopAgent(capture.Agent, evaluator, options); + + // Act + var updates = new List(); + await foreach (var update in agent.RunStreamingAsync([new ChatMessage(ChatRole.User, "original")], new ChatClientAgentSession())) + { + updates.Add(update); + } + + // Assert (the first iteration's "ack" and then the surfaced feedback whose iteration produced no updates). + Assert.Equal(["ack", "fix it"], updates.Select(static u => u.Text)); + AgentResponseUpdate feedbackUpdate = updates[1]; + Assert.Equal("loop", feedbackUpdate.AuthorName); + Assert.False(string.IsNullOrEmpty(feedbackUpdate.ResponseId)); + Assert.True(string.IsNullOrEmpty(feedbackUpdate.AgentId)); + Assert.False(string.IsNullOrEmpty(feedbackUpdate.MessageId)); + } + + #endregion +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopContextTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopContextTests.cs new file mode 100644 index 0000000000..0047c5d4fd --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopContextTests.cs @@ -0,0 +1,146 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Moq; + +namespace Microsoft.Agents.AI.UnitTests; + +/// +/// Unit tests for the class, including its public constructor used to test custom evaluators. +/// +public class LoopContextTests +{ + /// + /// Verify that the constructor throws when the agent is null. + /// + [Fact] + public void Constructor_NullAgent_Throws() + { + // Act & Assert + Assert.Throws("agent", () => new LoopContext( + null!, new ChatClientAgentSession(), [], CreateResponse())); + } + + /// + /// Verify that the constructor throws when the session is null. + /// + [Fact] + public void Constructor_NullSession_Throws() + { + // Act & Assert + Assert.Throws("session", () => new LoopContext( + new Mock().Object, null!, [], CreateResponse())); + } + + /// + /// Verify that the constructor throws when the initial messages are null. + /// + [Fact] + public void Constructor_NullInitialMessages_Throws() + { + // Act & Assert + Assert.Throws("initialMessages", () => new LoopContext( + new Mock().Object, new ChatClientAgentSession(), null!, CreateResponse())); + } + + /// + /// Verify that the constructor throws when the last response is null. + /// + [Fact] + public void Constructor_NullLastResponse_Throws() + { + // Act & Assert + Assert.Throws("lastResponse", () => new LoopContext( + new Mock().Object, new ChatClientAgentSession(), [], null!)); + } + + /// + /// Verify that the constructor populates the properties and that LastResponse is never null. + /// + [Fact] + public void Constructor_ValidArguments_SetsProperties() + { + // Arrange + var agent = new Mock().Object; + var session = new ChatClientAgentSession(); + ChatMessage[] initialMessages = [new ChatMessage(ChatRole.User, "go")]; + var response = CreateResponse("done"); + + // Act + var context = new LoopContext(agent, session, initialMessages, response); + + // Assert + Assert.Same(agent, context.Agent); + Assert.Same(session, context.Session); + Assert.Same(initialMessages, context.InitialMessages); + Assert.Same(response, context.LastResponse); + Assert.Null(context.RunOptions); + Assert.NotNull(context.AdditionalProperties); + Assert.Equal(0, context.Iteration); + Assert.Empty(context.Feedback); + } + + /// + /// Verify that the session can be replaced through the internal setter (used by the loop for fresh contexts). + /// + [Fact] + public void Session_IsInternallySettable() + { + // Arrange + var context = new LoopContext( + new Mock().Object, new ChatClientAgentSession(), [], CreateResponse()); + var newSession = new ChatClientAgentSession(); + + // Act + context.Session = newSession; + + // Assert + Assert.Same(newSession, context.Session); + } + + /// + /// Verify that can be assigned through its internal setter. + /// + [Fact] + public void Feedback_IsInternallySettable() + { + // Arrange + var context = new LoopContext( + new Mock().Object, new ChatClientAgentSession(), [], CreateResponse()); + + // Act + context.Feedback = ["first", null]; + + // Assert + Assert.Equal(["first", null], context.Feedback); + } + + /// + /// Verify that an evaluator can be evaluated against a publicly-constructed context (the scenario the public + /// constructor exists to support). + /// + [Fact] + public async Task PubliclyConstructedContext_CanEvaluateEvaluatorAsync() + { + // Arrange + var context = new LoopContext( + new Mock().Object, + new ChatClientAgentSession(), + [new ChatMessage(ChatRole.User, "go")], + CreateResponse("done")); + var evaluator = new DelegateLoopEvaluator((ctx, _) => + new ValueTask( + ctx.LastResponse.Text == "done" ? LoopEvaluation.Stop() : LoopEvaluation.Continue())); + + // Act + LoopEvaluation evaluation = await evaluator.EvaluateAsync(context); + + // Assert + Assert.False(evaluation.ShouldReinvoke); + } + + private static AgentResponse CreateResponse(string text = "response") => + new([new ChatMessage(ChatRole.Assistant, text)]); +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopEvaluationTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopEvaluationTests.cs new file mode 100644 index 0000000000..c6545deeba --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopEvaluationTests.cs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.Agents.AI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class LoopEvaluationTests +{ + /// + /// Verify that Stop produces an evaluation that does not re-invoke and carries no feedback. + /// + [Fact] + public void Stop_DoesNotReinvoke_AndHasNoFeedback() + { + // Act + var evaluation = LoopEvaluation.Stop(); + + // Assert + Assert.False(evaluation.ShouldReinvoke); + Assert.Null(evaluation.Feedback); + } + + /// + /// Verify that Continue with no argument re-invokes and carries no feedback. + /// + [Fact] + public void Continue_NoFeedback_ReinvokesWithNullFeedback() + { + // Act + var evaluation = LoopEvaluation.Continue(); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.Null(evaluation.Feedback); + } + + /// + /// Verify that Continue with whitespace-only feedback normalizes the feedback to null, matching the documented + /// "null, empty, or whitespace is treated as no feedback" semantics. + /// + [Theory] + [InlineData("")] + [InlineData(" ")] + [InlineData("\t\n")] + public void Continue_WhitespaceFeedback_NormalizesToNull(string feedback) + { + // Act + var evaluation = LoopEvaluation.Continue(feedback); + + // Assert + Assert.True(evaluation.ShouldReinvoke); + Assert.Null(evaluation.Feedback); + } +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopTestHelpers.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopTestHelpers.cs new file mode 100644 index 0000000000..98c9dd023f --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Harness/Loop/LoopTestHelpers.cs @@ -0,0 +1,141 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Moq; +using Moq.Protected; + +namespace Microsoft.Agents.AI.UnitTests; + +/// +/// Shared helpers used by the LoopAgent and LoopEvaluator unit tests. +/// +internal static class LoopTestHelpers +{ + /// + /// Creates a that re-invokes the agent (without feedback) while the + /// supplied predicate returns . + /// + public static DelegateLoopEvaluator While(Func shouldReinvoke) => + new((context, _) => + new ValueTask( + shouldReinvoke(context) ? LoopEvaluation.Continue() : LoopEvaluation.Stop())); + + /// + /// Creates a mocked judge that always returns the supplied response text. + /// + public static IChatClient CreateJudgeClient(string responseText) + { + var mock = new Mock(); + mock.Setup(c => c.GetResponseAsync( + It.IsAny>(), + It.IsAny(), + It.IsAny())) + .ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, responseText))); + return mock.Object; + } + + /// + /// Creates a mocked judge that always returns the supplied response text and captures the + /// messages it was invoked with via . + /// + public static IChatClient CreateCapturingJudgeClient(string responseText, out List capturedMessages) + { + var captured = new List(); + capturedMessages = captured; + var mock = new Mock(); + mock.Setup(c => c.GetResponseAsync( + It.IsAny>(), + It.IsAny(), + It.IsAny())) + .Callback, ChatOptions?, CancellationToken>((messages, _, _) => + { + captured.Clear(); + captured.AddRange(messages); + }) + .ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, responseText))); + return mock.Object; + } + + public static async IAsyncEnumerable ToAsyncEnumerableAsync( + IEnumerable items, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + foreach (var item in items) + { + cancellationToken.ThrowIfCancellationRequested(); + yield return item; + await Task.Yield(); + } + } +} + +/// +/// Captures the messages sent to a mocked non-streaming inner agent and produces responses by call index. +/// +internal sealed class InnerAgentCapture +{ + public InnerAgentCapture(Func responseFactory) + { + this.Mock + .Protected() + .Setup>("RunCoreAsync", + ItExpr.IsAny>(), + ItExpr.IsAny(), + ItExpr.IsAny(), + ItExpr.IsAny()) + .Callback, AgentSession?, AgentRunOptions?, CancellationToken>((msgs, session, _, _) => + { + this.CallCount++; + this.MessagesPerCall.Add(msgs.ToList()); + this.SessionsPerCall.Add(session); + }) + .ReturnsAsync(() => responseFactory(this.CallCount)); + } + + public Mock Mock { get; } = new(); + + public AIAgent Agent => this.Mock.Object; + + public int CallCount { get; private set; } + + public List> MessagesPerCall { get; } = []; + + public List SessionsPerCall { get; } = []; +} + +/// +/// Captures the messages sent to a mocked streaming inner agent and produces updates by call index. +/// +internal sealed class InnerStreamingCapture +{ + public InnerStreamingCapture(Func updatesFactory) + { + this.Mock + .Protected() + .Setup>("RunCoreStreamingAsync", + ItExpr.IsAny>(), + ItExpr.IsAny(), + ItExpr.IsAny(), + ItExpr.IsAny()) + .Returns, AgentSession?, AgentRunOptions?, CancellationToken>((msgs, _, _, ct) => + { + this.CallCount++; + this.MessagesPerCall.Add(msgs.ToList()); + return LoopTestHelpers.ToAsyncEnumerableAsync(updatesFactory(this.CallCount), ct); + }); + } + + public Mock Mock { get; } = new(); + + public AIAgent Agent => this.Mock.Object; + + public int CallCount { get; private set; } + + public List> MessagesPerCall { get; } = []; +}