.NET: Add LoopAgent capability for Harnesses (#6384)

* Add LoopAgent capability for Harnesses

* Address PR comments.

* Add support for returning user messages and response aggregation

* Support fresh context per iteration with input sessions via cloning

* Add ability to receive newly created sessions via callback

* Address PR comments

* Add judge criteria

* Address PR comments
This commit is contained in:
westey
2026-06-11 16:00:01 +01:00
committed by GitHub
Unverified
parent 8e1998ddcb
commit 12ce099165
24 changed files with 3823 additions and 0 deletions
+1
View File
@@ -129,6 +129,7 @@
<Project Path="samples/02-agents/Harness/Harness_Step02_Research_WithBackgroundAgents/Harness_Step02_Research_WithBackgroundAgents.csproj" />
<Project Path="samples/02-agents/Harness/Harness_Step03_DataProcessing/Harness_Step03_DataProcessing.csproj" />
<Project Path="samples/02-agents/Harness/Harness_Step04_CodeExecution/Harness_Step04_CodeExecution.csproj" />
<Project Path="samples/02-agents/Harness/Harness_Step05_Loop/Harness_Step05_Loop.csproj" />
</Folder>
<Folder Name="/Samples/02-agents/AGUI/Step05_StateManagement/">
<Project Path="samples/02-agents/AGUI/Step05_StateManagement/Client/Client.csproj" />
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net10.0</TargetFrameworks>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Azure.AI.Projects" />
<PackageReference Include="Azure.Identity" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\..\src\Microsoft.Agents.AI.Foundry\Microsoft.Agents.AI.Foundry.csproj" />
<ProjectReference Include="..\..\..\..\src\Microsoft.Agents.AI.Harness\Microsoft.Agents.AI.Harness.csproj" />
</ItemGroup>
</Project>
@@ -0,0 +1,272 @@
// Copyright (c) Microsoft. All rights reserved.
// This sample demonstrates how to wrap a HarnessAgent with the LoopAgent decorator to re-invoke
// the agent until a configured LoopEvaluator decides to stop. It covers the common looping patterns
// through one decorator, each driven by a different evaluator:
//
// 1. Completion-marker (Ralph-style) loop — keep refining until the agent emits a completion
// marker, restarting each pass from a fresh context (CompletionMarkerLoopEvaluator +
// FreshContextPerIteration).
// 2. Delegate predicate (todos remaining) — loop while the built-in TodoProvider still has open
// items (DelegateLoopEvaluator).
// 3. AI judge — a second chat client decides whether the original request was answered, and the
// loop continues while the answer is "no" (AIJudgeLoopEvaluator).
// 4. Approval heuristics + loop — combine the LoopAgent with the ToolApprovalAgent auto-approval
// heuristics so a looped agent auto-approves tool calls instead of stalling on approval.
//
// The demos run sequentially and print each loop's final response.
#pragma warning disable OPENAI001 // Suppress experimental API warnings for Responses API usage.
#pragma warning disable MAAI001 // Suppress experimental API warnings for Agents AI experiments.
using System.ClientModel.Primitives;
using System.ComponentModel;
using Azure.AI.Projects;
using Azure.Identity;
using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
var endpoint = Environment.GetEnvironmentVariable("AZURE_AI_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_AI_PROJECT_ENDPOINT is not set.");
var deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_MODEL_DEPLOYMENT_NAME") ?? "gpt-5.4";
// The HarnessAgent pre-configures function invocation, per-service-call chat history persistence, and
// context-window compaction. These bounds size the in-loop compaction window.
const int MaxContextWindowTokens = 1_050_000;
const int MaxOutputTokens = 32_000;
// Build a single Foundry-backed IChatClient factory shared by every demo. Each call returns a fresh
// IChatClient over the same Responses endpoint.
var projectClient = new AIProjectClient(
new Uri(endpoint),
// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid
// latency issues, unintended credential probing, and potential security risks from fallback mechanisms.
new DefaultAzureCredential(),
new AIProjectClientOptions { RetryPolicy = new ClientRetryPolicy(3) });
IChatClient CreateChatClient() =>
projectClient.GetProjectOpenAIClient().GetResponsesClient().AsIChatClient(deploymentName);
await RalphLoopAsync();
await TodoLoopAsync();
await JudgeLoopAsync();
await ApprovalLoopAsync();
// Pattern 1: a "Ralph"-style loop that refines until the agent signals completion.
async Task RalphLoopAsync()
{
Console.WriteLine("\n=== 1. Completion-marker (Ralph-style) loop — refine until <promise>COMPLETE</promise> (max 5) ===");
// Build a lean HarnessAgent: no todo or mode providers for this iterative-refinement task.
AIAgent harnessAgent = CreateLeanHarnessAgent(
name: "ralph",
instructions:
"""
You are iteratively refining a product name for a note-taking app. Each turn, build on the
feedback so far: propose an improved candidate with a short reason. When you are confident the
name is final, end your message with the exact marker <promise>COMPLETE</promise>.
""");
// CompletionMarkerLoopEvaluator stops once the marker appears in the response; until then it
// re-invokes the agent. FreshContextPerIteration restarts each pass from the original task plus the
// aggregated feedback log on a brand-new session. Because each pass starts fresh, the agent has no
// memory of its prior suggestion — so the feedback template includes the {last_response} placeholder
// to echo the previous candidate back to it.
AIAgent loopAgent = new LoopAgent(
harnessAgent,
new CompletionMarkerLoopEvaluator("<promise>COMPLETE</promise>", options: new()
{
FeedbackMessageTemplate =
"Your previous suggestion was:\n" + CompletionMarkerLoopEvaluator.LastResponsePlaceholder +
"\n\nContinue to refine the name and remember to reply with " +
CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when happy.",
}),
new LoopAgentOptions { MaxIterations = 5, FreshContextPerIteration = true });
AgentResponse response = await StreamLoopAsync(loopAgent, "Suggest a name for a note-taking app.");
Console.WriteLine($"\nFinal response:\n{response.Text}");
}
// Pattern 2: loop while the built-in TodoProvider still has open items.
async Task TodoLoopAsync()
{
Console.WriteLine("\n=== 2. Delegate predicate — loop while todos remain (max 6) ===");
// Keep the built-in TodoProvider enabled (only the mode provider is disabled) so the agent has
// todo tools to plan and track work.
AIAgent harnessAgent = CreateLeanHarnessAgent(
name: "planner",
instructions:
"""
You are a planning assistant. First break the task into todo items using your todo tools.
Then, on each turn, make progress and mark completed items as done. When all items are
complete, summarize the result.
""",
disableTodoProvider: false);
// The predicate re-invokes the agent while any todo item is still open. The evaluator fetches the
// built-in TodoProvider from context.Agent (via GetService, which forwards through the harness
// decorators to the underlying ChatClientAgent's context providers), keeping the delegate
// self-contained, then queries it against the loop's current session. When items remain, it returns
// feedback telling the agent to finish them. MaxIterations guarantees the loop stops even if the
// agent stalls.
AIAgent loopAgent = new LoopAgent(
harnessAgent,
new DelegateLoopEvaluator(async (context, cancellationToken) =>
{
var todoProvider = context.Agent.GetService<TodoProvider>()
?? throw new InvalidOperationException("The agent did not expose a TodoProvider.");
var remaining = await todoProvider.GetRemainingTodosAsync(context.Session).ConfigureAwait(false);
return remaining.Count > 0
? LoopEvaluation.Continue($"Not all todos are complete yet ({remaining.Count} remaining). Please complete the remaining todo items.")
: LoopEvaluation.Stop();
}),
new LoopAgentOptions { MaxIterations = 6 });
// The LoopAgent creates a single session up front and reuses it across iterations (non-fresh
// mode), so the todo state persists; the predicate reads it via context.Session.
AgentResponse response = await StreamLoopAsync(
loopAgent,
"Plan and outline a 3-section blog post about Rayleigh scattering.");
Console.WriteLine($"\nFinal response:\n{response.Text}");
}
// Pattern 3: a second chat client judges whether the original request was answered.
async Task JudgeLoopAsync()
{
Console.WriteLine("\n=== 3. AI judge — loop until the request is answered (max 4) ===");
AIAgent harnessAgent = CreateLeanHarnessAgent(
name: "answerer",
instructions: "You are a helpful assistant. Answer the user's question thoroughly.");
// The judge uses its own IChatClient. AIJudgeLoopEvaluator asks it (via a JudgeVerdict structured
// output) whether the original request has been fully addressed and continues while the answer is
// "no", injecting the judge's gap analysis as the next iteration's input. Judge loops use a small
// MaxIterations cap because each pass costs an extra model call.
AIAgent loopAgent = new LoopAgent(
harnessAgent,
new AIJudgeLoopEvaluator(CreateChatClient()),
new LoopAgentOptions { MaxIterations = 4 });
AgentResponse response = await StreamLoopAsync(
loopAgent,
"Explain why the sky is blue, then also explain why sunsets are red.");
Console.WriteLine($"\nFinal response:\n{response.Text}");
}
// Pattern 4: combine the loop with the ToolApprovalAgent auto-approval heuristics.
async Task ApprovalLoopAsync()
{
Console.WriteLine("\n=== 4. Approval heuristics + loop — auto-approve tool calls in the loop (max 2) ===");
var deployTool = new ApprovalRequiredAIFunction(
AIFunctionFactory.Create(DeploymentTools.DeployService));
// Configure the HarnessAgent's built-in ToolApprovalAgent with an auto-approval rule. The rule
// approves the deploy_service call without prompting, so the inner agent resolves the approval
// internally and never surfaces a pending approval to the LoopAgent — letting the loop proceed.
AIAgent harnessAgent = CreateLeanHarnessAgent(
name: "operator",
instructions: "You are a deployment operator. Use the DeployService tool to fulfil requests.",
tools: [deployTool],
toolApprovalAgentOptions: new ToolApprovalAgentOptions
{
AutoApprovalRules =
[
functionCall =>
{
Console.WriteLine($" Auto-approving: {functionCall.Name}");
return ValueTask.FromResult(true);
},
],
});
// Drive a short loop that continues until the response confirms the deployment.
AIAgent loopAgent = new LoopAgent(
harnessAgent,
new DelegateLoopEvaluator((context, _) =>
new ValueTask<LoopEvaluation>(
context.LastResponse.Text.Contains("deployed", StringComparison.OrdinalIgnoreCase)
? LoopEvaluation.Stop()
: LoopEvaluation.Continue())),
new LoopAgentOptions { MaxIterations = 2 });
// The LoopAgent reuses a single session across iterations, so the approval response flows back in.
AgentResponse response = await StreamLoopAsync(loopAgent, "Deploy the billing service.");
Console.WriteLine($"\nFinal response:\n{response.Text}");
}
// Streams a loop run to the console, printing updates live and marking each new inner run (detected
// via a change in ResponseId) with an "--- run N ---" header so you can see when the LoopAgent
// re-invokes the inner agent. Each message is prefixed with "User:" or "Agent:" based on its role, so
// the loop's on-behalf-of feedback (User) is visually distinct from the agent's responses (Agent).
// Returns the aggregated final response.
static async Task<AgentResponse> StreamLoopAsync(AIAgent loopAgent, string input, AgentSession? session = null)
{
string? currentResponseId = null;
ChatRole? currentRole = null;
var runCount = 0;
var updates = new List<AgentResponseUpdate>();
await foreach (var update in loopAgent.RunStreamingAsync(input, session))
{
// A new ResponseId signals the start of another inner run (loop iteration).
if (update.ResponseId is { } responseId && responseId != currentResponseId)
{
currentResponseId = responseId;
currentRole = null;
Console.WriteLine($"\n--- run {++runCount} ---");
}
// Print a role-based prefix whenever the speaker changes — for example the loop's on-behalf-of
// user feedback versus the agent's response.
if (update.Role is { } role && role != currentRole)
{
currentRole = role;
var prefix = role == ChatRole.User ? "User" : role == ChatRole.Assistant ? "Agent" : role.Value;
Console.Write($"\n{prefix}: ");
}
Console.Write(update.Text);
updates.Add(update);
}
Console.WriteLine();
return updates.ToAgentResponse();
}
// Creates a HarnessAgent with the agent-mode provider always disabled (and the todo provider disabled
// by default), plus all other heavyweight providers turned off so each loop demo stays focused.
AIAgent CreateLeanHarnessAgent(
string name,
string instructions,
bool disableTodoProvider = true,
IList<AITool>? tools = null,
ToolApprovalAgentOptions? toolApprovalAgentOptions = null) =>
CreateChatClient().AsHarnessAgent(new HarnessAgentOptions
{
Name = name,
MaxContextWindowTokens = MaxContextWindowTokens,
MaxOutputTokens = MaxOutputTokens,
DisableAgentModeProvider = true,
DisableTodoProvider = disableTodoProvider,
DisableFileMemory = true,
DisableFileAccess = true,
DisableWebSearch = true,
ToolApprovalAgentOptions = toolApprovalAgentOptions,
ChatOptions = new ChatOptions
{
Instructions = instructions,
Tools = tools,
MaxOutputTokens = MaxOutputTokens,
},
});
/// <summary>Tool used by the approval-handling demo.</summary>
internal static class DeploymentTools
{
[Description("Deploy a service to production (requires approval).")]
public static string DeployService([Description("The name of the service to deploy.")] string service) =>
$"Deployed {service} to production.";
}
@@ -0,0 +1,59 @@
# What this sample demonstrates
This sample demonstrates how to wrap a `HarnessAgent` with the **`LoopAgent`** decorator to re-invoke the agent until a configured **`LoopEvaluator`** decides to stop. A single decorator covers the common looping patterns — you just plug in a different evaluator (and optionally switch on fresh-context mode).
The `HarnessAgent` pre-configures function invocation, per-service-call chat history persistence, and in-loop compaction, so each demo only supplies the chat client, token limits, and instructions, then wraps the result with a `LoopAgent`.
## Looping patterns showcased
The program runs four demos sequentially, each driven by a different evaluator:
| # | Pattern | Evaluator | Notes |
| --- | --- | --- | --- |
| 1 | Completion-marker ("Ralph"-style) loop | `CompletionMarkerLoopEvaluator` | Re-invokes until the agent emits `<promise>COMPLETE</promise>`. Uses `FreshContextPerIteration = true` to restart each pass from the original task plus the aggregated feedback log on a new session, and includes the `{last_response}` placeholder in the feedback template so the agent sees its previous suggestion even though each pass starts fresh. |
| 2 | Delegate predicate (todos remaining) | `DelegateLoopEvaluator` | Loops while the built-in `TodoProvider` still has open items. The provider is fetched from the agent via `GetService<TodoProvider>()` and queried against the loop's current session. |
| 3 | AI judge | `AIJudgeLoopEvaluator` | A second `IChatClient` judges whether the original request was fully answered and continues while the answer is "no", injecting its gap analysis as the next input. |
| 4 | Approval heuristics + loop | `DelegateLoopEvaluator` + `ToolApprovalAgent` | Combines the `ToolApprovalAgent` auto-approval heuristics (`AutoApprovalRules`) with the loop, so a looped agent auto-approves tool calls instead of stalling on a pending approval. |
`MaxIterations` caps every loop so it always terminates even if the evaluator never stops.
### Evaluator mapping (Python → .NET)
The Python sample in [microsoft/agent-framework#6174](https://github.com/microsoft/agent-framework/pull/6174) exposes several distinct loop classes. In .NET these collapse into one `LoopAgent` that consumes evaluators:
| Python | .NET |
| --- | --- |
| Ralph loop (completion marker) | `LoopAgent` + `CompletionMarkerLoopEvaluator` |
| Ralph loop (fresh context each pass) | `LoopAgent` + `CompletionMarkerLoopEvaluator` + `FreshContextPerIteration = true` |
| Callable / predicate loop | `LoopAgent` + `DelegateLoopEvaluator` |
| AI judge loop | `LoopAgent` + `AIJudgeLoopEvaluator` |
## Prerequisites
Before running this sample, ensure you have:
1. An Azure AI Foundry project with a deployed model (e.g., `gpt-5.4`)
2. Azure CLI installed and authenticated (`az login`)
## Environment Variables
Set the following environment variables:
```bash
# Required: Your Azure AI Foundry project endpoint
export AZURE_AI_PROJECT_ENDPOINT="https://your-project.services.ai.azure.com/api/projects/your-project"
# Optional: Model deployment name (defaults to gpt-5.4)
export AZURE_AI_MODEL_DEPLOYMENT_NAME="gpt-5.4"
```
## Running the Sample
```bash
cd dotnet
dotnet run --project samples/02-agents/Harness/Harness_Step05_Loop
```
## What to Expect
The program runs the four demos in order. Each loop is executed with `RunStreamingAsync`, so output is printed live and every re-invocation of the inner agent is marked with a `--- run N ---` header (detected via a change in the streamed `ResponseId`) — this lets you see exactly when the `LoopAgent` loops. Each streamed message is prefixed with `User:` or `Agent:` based on its role, so the loop's on-behalf-of feedback messages (surfaced as `User` turns) are visually distinct from the agent's responses (`Agent`). Each demo finishes by printing its aggregated final response. Demo 4 also prints an `Auto-approving: ...` line each time the `ToolApprovalAgent` heuristic approves the `DeployService` tool call, showing how approval-aware agents integrate with the loop.
@@ -9,3 +9,4 @@ Samples demonstrating the [Harness AIContextProviders](../../../src/Microsoft.Ag
| [Harness_Step01_Research](./Harness_Step01_Research/README.md) | Using a ChatClientAgent with TodoProvider and AgentModeProvider for research, showcasing planning mode and todo management |
| [Harness_Step02_Research_WithBackgroundAgents](./Harness_Step02_Research_WithBackgroundAgents/README.md) | Using BackgroundAgentsProvider to delegate stock price lookups to a web-search background agent concurrently |
| [Harness_Step03_DataProcessing](./Harness_Step03_DataProcessing/README.md) | Using FileAccessProvider to give an agent access to CSV data files for reading, analysis, and output generation |
| [Harness_Step05_Loop](./Harness_Step05_Loop/README.md) | Wrapping a HarnessAgent with the LoopAgent decorator to re-invoke it until a configured LoopEvaluator (completion marker, predicate, AI judge, or approval-aware loop) decides to stop |
@@ -0,0 +1,201 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;
namespace Microsoft.Agents.AI;
/// <summary>
/// A <see cref="LoopEvaluator"/> that uses a separate judge chat client to decide whether the user's original request
/// has been fully addressed, continuing the loop (with the judge's gap analysis as feedback) while the answer is "no".
/// </summary>
/// <remarks>
/// <para>
/// After each iteration the judge is queried directly (without any agent tools, session, or middleware) with the
/// original request and the agent's latest response, and asked for a structured <see cref="JudgeVerdict"/>. If the
/// judge client does not honor structured output, the verdict falls back to parsing the raw text for the
/// non-overlapping <see cref="DoneVerdictMarker"/> / <see cref="MoreVerdictMarker"/> markers (with
/// <see cref="MoreVerdictMarker"/> winning, so the loop keeps running, when the verdict is ambiguous or absent).
/// </para>
/// <para>
/// When the request is not yet answered, the evaluator returns feedback built from
/// <see cref="AIJudgeLoopEvaluatorOptions.FeedbackMessageTemplate"/> with the judge's gap analysis substituted for
/// <see cref="GapAnalysisPlaceholder"/>. How that feedback is delivered to the agent (and whether the session is
/// reset) is decided by the <see cref="LoopAgent"/> that consumes this evaluator.
/// </para>
/// <para>
/// The judge instructions act as a template: any occurrence of <see cref="CriteriaPlaceholder"/> is replaced with the
/// rendered <see cref="AIJudgeLoopEvaluatorOptions.Criteria"/> (or removed when no criteria are supplied), letting
/// callers add bespoke standards the response must satisfy.
/// </para>
/// <para>
/// LLM-judged loops are costly and probabilistic, so consider setting a stricter
/// <see cref="LoopAgentOptions.MaxIterations"/> on the owning <see cref="LoopAgent"/>.
/// </para>
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class AIJudgeLoopEvaluator : LoopEvaluator
{
/// <summary>The default system instructions used to prompt the judge.</summary>
/// <remarks>
/// Acts as a template: the trailing <see cref="CriteriaPlaceholder"/> is replaced with the rendered
/// <see cref="AIJudgeLoopEvaluatorOptions.Criteria"/> (or removed when none are supplied).
/// </remarks>
public const string DefaultInstructions =
"You are an evaluator. You are given a user's original request and an agent's latest response. " +
"Decide whether the agent has fully addressed the original request. " +
"Set 'answered' to true if the request has been fully addressed, or false if more work is still required. " +
"When 'answered' is false, use 'gapAnalysis' to explain what is still missing or what work remains. " +
"If you cannot return structured output, reply with " + DoneVerdictMarker + " when the request has been fully " +
"addressed, or " + MoreVerdictMarker + " when more work is still required." +
CriteriaPlaceholder;
/// <summary>
/// The verdict marker the judge is asked to emit (for clients that do not honor structured output) when the
/// original request has been fully addressed.
/// </summary>
/// <remarks>
/// <see cref="DoneVerdictMarker"/> and <see cref="MoreVerdictMarker"/> are deliberately non-overlapping (neither is
/// a substring of the other), so the text fallback cannot misclassify one verdict as the other. When the marker is
/// ambiguous or absent, <see cref="MoreVerdictMarker"/> wins so the loop keeps running rather than stopping on an
/// incomplete answer.
/// </remarks>
public const string DoneVerdictMarker = "VERDICT: DONE";
/// <summary>
/// The verdict marker the judge is asked to emit (for clients that do not honor structured output) when more work
/// is still required. Takes precedence over <see cref="DoneVerdictMarker"/> when both (or neither) are present.
/// </summary>
public const string MoreVerdictMarker = "VERDICT: MORE";
/// <summary>
/// The placeholder token within <see cref="DefaultInstructions"/> (or a custom
/// <see cref="AIJudgeLoopEvaluatorOptions.Instructions"/>) that is replaced with the rendered
/// <see cref="AIJudgeLoopEvaluatorOptions.Criteria"/>. When no criteria are supplied, the placeholder is removed.
/// </summary>
public const string CriteriaPlaceholder = "{criteria}";
/// <summary>
/// The placeholder token within <see cref="DefaultFeedbackMessageTemplate"/> (or a custom
/// <see cref="AIJudgeLoopEvaluatorOptions.FeedbackMessageTemplate"/>) that is replaced with the judge's gap analysis.
/// </summary>
public const string GapAnalysisPlaceholder = "{gap_analysis}";
/// <summary>The default template used to build the feedback produced when the request is not yet answered.</summary>
public const string DefaultFeedbackMessageTemplate =
"Your previous response did not fully address the original request. " +
"The following is still missing or incomplete: " + GapAnalysisPlaceholder + " " +
"Please continue and fully address the original request.";
/// <summary>The value substituted for the gap analysis when the judge did not provide one.</summary>
private const string UnknownGapAnalysis = "<unknown>";
private readonly IChatClient _judgeClient;
private readonly string _instructions;
private readonly string _feedbackMessageTemplate;
/// <summary>
/// Initializes a new instance of the <see cref="AIJudgeLoopEvaluator"/> class.
/// </summary>
/// <param name="judgeClient">The chat client used to judge whether the original request was answered.</param>
/// <param name="options">Optional configuration for the judge. When <see langword="null"/>, defaults are used.</param>
/// <exception cref="ArgumentNullException"><paramref name="judgeClient"/> is <see langword="null"/>.</exception>
public AIJudgeLoopEvaluator(IChatClient judgeClient, AIJudgeLoopEvaluatorOptions? options = null)
{
this._judgeClient = Throw.IfNull(judgeClient);
this._instructions = (options?.Instructions ?? DefaultInstructions)
.Replace(CriteriaPlaceholder, RenderCriteria(options?.Criteria));
this._feedbackMessageTemplate = options?.FeedbackMessageTemplate ?? DefaultFeedbackMessageTemplate;
}
/// <inheritdoc />
public override async ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(context);
// Build the judge's user message from AIContent so non-text request content (images, data, etc.) is
// preserved rather than flattened to text. The original request's contents are framed between header
// text segments, followed by the agent's latest response text.
var userContents = new List<AIContent>
{
new TextContent("# Has the original request been fully addressed?\n\n## Original request:\n"),
};
foreach (ChatMessage message in context.InitialMessages)
{
userContents.AddRange(message.Contents);
}
userContents.Add(new TextContent($"\n\n## Agent's latest response:\n{context.LastResponse.Text}"));
List<ChatMessage> judgeMessages =
[
new ChatMessage(ChatRole.System, this._instructions),
new ChatMessage(ChatRole.User, userContents),
];
bool answered;
string gapAnalysis = UnknownGapAnalysis;
ChatResponse<JudgeVerdict> response = await this._judgeClient
.GetResponseAsync<JudgeVerdict>(judgeMessages, LoopJsonContext.Default.Options, cancellationToken: cancellationToken)
.ConfigureAwait(false);
if (response.TryGetResult(out JudgeVerdict? verdict) && verdict is not null)
{
answered = verdict.Answered;
if (!string.IsNullOrWhiteSpace(verdict.GapAnalysis))
{
gapAnalysis = verdict.GapAnalysis;
}
}
else
{
// Fallback for clients that do not honor structured output: look for the explicit, non-overlapping verdict
// markers. MoreVerdictMarker wins so an ambiguous or marker-less reply keeps looping rather than stopping
// on an incomplete answer.
string text = response.Text.ToUpperInvariant();
answered = !text.Contains(MoreVerdictMarker) && text.Contains(DoneVerdictMarker);
}
// The request is answered: stop looping.
if (answered)
{
return LoopEvaluation.Stop();
}
// Not yet answered: continue, providing feedback describing what is still missing.
string feedback = this._feedbackMessageTemplate.Replace(GapAnalysisPlaceholder, gapAnalysis);
return LoopEvaluation.Continue(feedback);
}
/// <summary>
/// Renders the supplied <paramref name="criteria"/> into a bullet block appended at <see cref="CriteriaPlaceholder"/>,
/// or an empty string when no non-blank criteria are supplied.
/// </summary>
private static string RenderCriteria(IEnumerable<string>? criteria)
{
if (criteria is null)
{
return string.Empty;
}
var builder = new StringBuilder();
foreach (string criterion in criteria)
{
if (!string.IsNullOrWhiteSpace(criterion))
{
builder.Append("\n- ").Append(criterion);
}
}
return builder.Length == 0
? string.Empty
: "\n\nThe response must satisfy all of the following criteria:" + builder;
}
}
@@ -0,0 +1,48 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Shared.DiagnosticIds;
namespace Microsoft.Agents.AI;
/// <summary>
/// Provides configuration options for <see cref="AIJudgeLoopEvaluator"/>.
/// </summary>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class AIJudgeLoopEvaluatorOptions
{
/// <summary>
/// Gets or sets the system instructions used to prompt the judge, or <see langword="null"/> to use
/// <see cref="AIJudgeLoopEvaluator.DefaultInstructions"/>.
/// </summary>
/// <remarks>
/// Any occurrence of <see cref="AIJudgeLoopEvaluator.CriteriaPlaceholder"/> in the instructions is replaced with
/// the rendered <see cref="Criteria"/> (or removed when no criteria are supplied). Instructions that omit the
/// placeholder do not receive the criteria.
/// </remarks>
public string? Instructions { get; set; }
/// <summary>
/// Gets or sets an optional list of additional criteria the agent's response must satisfy, evaluated by the judge
/// alongside the original request.
/// </summary>
/// <remarks>
/// When supplied, the criteria are rendered into the judge instructions wherever
/// <see cref="AIJudgeLoopEvaluator.CriteriaPlaceholder"/> appears (including in
/// <see cref="AIJudgeLoopEvaluator.DefaultInstructions"/>). When <see langword="null"/> or empty, the placeholder is
/// removed and no criteria are added.
/// </remarks>
public IEnumerable<string>? Criteria { get; set; }
/// <summary>
/// Gets or sets the template used to build the feedback produced when the judge decides the original request was
/// not fully addressed, or <see langword="null"/> to use
/// <see cref="AIJudgeLoopEvaluator.DefaultFeedbackMessageTemplate"/>.
/// </summary>
/// <remarks>
/// Any occurrence of <see cref="AIJudgeLoopEvaluator.GapAnalysisPlaceholder"/> in the template is replaced with the
/// judge's gap analysis (or a placeholder when none is available).
/// </remarks>
public string? FeedbackMessageTemplate { get; set; }
}
@@ -0,0 +1,78 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Diagnostics.CodeAnalysis;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;
namespace Microsoft.Agents.AI;
/// <summary>
/// A <see cref="LoopEvaluator"/> that stops the loop once a configured marker string appears in the agent's latest
/// response, and otherwise continues with feedback asking the agent to keep working and to emit the marker when done.
/// </summary>
/// <remarks>
/// The feedback produced while the marker is absent is built from a template (see
/// <see cref="CompletionMarkerLoopEvaluatorOptions.FeedbackMessageTemplate"/>) with the configured marker substituted
/// for <see cref="CompletionMarkerPlaceholder"/>, and the agent's latest response text substituted for
/// <see cref="LastResponsePlaceholder"/>. How that feedback is delivered to the agent (and whether the session
/// is reset) is decided by the <see cref="LoopAgent"/> that consumes this evaluator.
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class CompletionMarkerLoopEvaluator : LoopEvaluator
{
/// <summary>
/// The placeholder token within <see cref="DefaultFeedbackMessageTemplate"/> (or a custom
/// <see cref="CompletionMarkerLoopEvaluatorOptions.FeedbackMessageTemplate"/>) that is replaced with the
/// configured completion marker.
/// </summary>
public const string CompletionMarkerPlaceholder = "{completion_marker}";
/// <summary>
/// The placeholder token within a custom <see cref="CompletionMarkerLoopEvaluatorOptions.FeedbackMessageTemplate"/>
/// that is replaced with the text of the agent's latest response. This is substituted on each evaluation, so it lets
/// the feedback echo back what the agent previously produced — useful when the consuming
/// <see cref="LoopAgent"/> uses <see cref="LoopAgentOptions.FreshContextPerIteration"/>, where the agent would
/// otherwise have no record of its prior output.
/// </summary>
public const string LastResponsePlaceholder = "{last_response}";
/// <summary>The default template used to build the feedback produced while the completion marker is absent.</summary>
public const string DefaultFeedbackMessageTemplate =
"Continue working on the request. When you have fully completed the task, end your response with the marker '" +
CompletionMarkerPlaceholder + "' to indicate completion.";
private readonly string _completionMarker;
private readonly string _feedbackMessageTemplate;
/// <summary>
/// Initializes a new instance of the <see cref="CompletionMarkerLoopEvaluator"/> class.
/// </summary>
/// <param name="completionMarker">The marker string that stops the loop once it appears in the agent's latest response text.</param>
/// <param name="options">Optional configuration for the feedback message. When <see langword="null"/>, defaults are used.</param>
/// <exception cref="System.ArgumentException"><paramref name="completionMarker"/> is <see langword="null"/>, empty, or whitespace.</exception>
public CompletionMarkerLoopEvaluator(string completionMarker, CompletionMarkerLoopEvaluatorOptions? options = null)
{
this._completionMarker = Throw.IfNullOrWhitespace(completionMarker);
// The completion marker is fixed, so substitute it once here. The optional {last_response} placeholder depends
// on the per-iteration response text, so it is substituted later in EvaluateAsync.
this._feedbackMessageTemplate = (options?.FeedbackMessageTemplate ?? DefaultFeedbackMessageTemplate)
.Replace(CompletionMarkerPlaceholder, this._completionMarker);
}
/// <inheritdoc />
public override ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(context);
if (context.LastResponse.Text.Contains(this._completionMarker))
{
return new ValueTask<LoopEvaluation>(LoopEvaluation.Stop());
}
string feedback = this._feedbackMessageTemplate.Replace(LastResponsePlaceholder, context.LastResponse.Text);
return new ValueTask<LoopEvaluation>(LoopEvaluation.Continue(feedback));
}
}
@@ -0,0 +1,26 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Diagnostics.CodeAnalysis;
using Microsoft.Shared.DiagnosticIds;
namespace Microsoft.Agents.AI;
/// <summary>
/// Provides configuration options for <see cref="CompletionMarkerLoopEvaluator"/>.
/// </summary>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class CompletionMarkerLoopEvaluatorOptions
{
/// <summary>
/// Gets or sets the template used to build the feedback produced when the completion marker has not yet appeared,
/// or <see langword="null"/> to use <see cref="CompletionMarkerLoopEvaluator.DefaultFeedbackMessageTemplate"/>.
/// </summary>
/// <remarks>
/// Any occurrence of <see cref="CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder"/> in the template is
/// replaced with the configured completion marker. Any occurrence of
/// <see cref="CompletionMarkerLoopEvaluator.LastResponsePlaceholder"/> is replaced, on each evaluation, with the
/// text of the agent's latest response — useful for echoing the agent's prior output back to it when the consuming
/// <see cref="CompletionMarkerLoopEvaluator"/> is used with a fresh context per iteration.
/// </remarks>
public string? FeedbackMessageTemplate { get; set; }
}
@@ -0,0 +1,40 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Diagnostics.CodeAnalysis;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;
namespace Microsoft.Agents.AI;
/// <summary>
/// A <see cref="LoopEvaluator"/> that delegates the re-invocation decision and feedback to a user-supplied callback.
/// </summary>
/// <remarks>
/// This is the most flexible evaluator: the supplied delegate receives the full <see cref="LoopContext"/> and returns
/// a <see cref="LoopEvaluation"/>, so it can decide both whether to continue and what feedback (if any) to provide.
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class DelegateLoopEvaluator : LoopEvaluator
{
private readonly Func<LoopContext, CancellationToken, ValueTask<LoopEvaluation>> _evaluate;
/// <summary>
/// Initializes a new instance of the <see cref="DelegateLoopEvaluator"/> class.
/// </summary>
/// <param name="evaluate">A callback that decides whether to re-invoke the agent and what feedback to provide.</param>
/// <exception cref="ArgumentNullException"><paramref name="evaluate"/> is <see langword="null"/>.</exception>
public DelegateLoopEvaluator(Func<LoopContext, CancellationToken, ValueTask<LoopEvaluation>> evaluate)
{
this._evaluate = Throw.IfNull(evaluate);
}
/// <inheritdoc />
public override ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(context);
return this._evaluate(context, cancellationToken);
}
}
@@ -0,0 +1,26 @@
// Copyright (c) Microsoft. All rights reserved.
using System.ComponentModel;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Shared.DiagnosticIds;
namespace Microsoft.Agents.AI;
/// <summary>
/// Represents the structured verdict returned by the judge chat client used by <see cref="AIJudgeLoopEvaluator"/>.
/// </summary>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
internal sealed class JudgeVerdict
{
/// <summary>
/// Gets or sets a value indicating whether the agent has fully addressed the user's original request.
/// </summary>
[Description("True if the agent has fully addressed the original request, otherwise false.")]
public bool Answered { get; set; }
/// <summary>
/// Gets or sets an explanation of what is still missing when the request has not been fully addressed.
/// </summary>
[Description("When 'answered' is false, explain what is still missing or what work remains to fully address the original request.")]
public string GapAnalysis { get; set; } = string.Empty;
}
@@ -0,0 +1,548 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;
namespace Microsoft.Agents.AI;
/// <summary>
/// A <see cref="DelegatingAIAgent"/> that re-invokes the wrapped agent in a loop until the configured
/// <see cref="LoopEvaluator"/> set decides to stop.
/// </summary>
/// <remarks>
/// <para>
/// After each run of the wrapped agent, the configured evaluators are asked whether to re-invoke the agent and what
/// feedback to carry forward. This enables patterns such as iterative refinement, working through a task list, or
/// judging whether the original request was answered. Out-of-the-box evaluators include
/// <see cref="AIJudgeLoopEvaluator"/>, <see cref="CompletionMarkerLoopEvaluator"/>, and
/// <see cref="DelegateLoopEvaluator"/>.
/// </para>
/// <para>
/// When multiple evaluators are supplied they are evaluated in order after each iteration. The first evaluator that
/// asks to re-invoke wins: its feedback drives the next iteration and the remaining evaluators are not evaluated. The
/// loop stops only when every evaluator asks to stop. Consequently, evaluator order is priority order and
/// <see cref="LoopEvaluation.Stop"/> means "this evaluator does not request continuation" rather than a veto that
/// terminates the loop; place stop-only guards accordingly.
/// </para>
/// <para>
/// The caller's initial messages are sent to the wrapped agent exactly once. By default (when
/// <see cref="LoopAgentOptions.FreshContextPerIteration"/> is <see langword="false"/>) the loop reuses a single session
/// and sends only the winning evaluator's feedback as the next input, letting the agent continue from session history.
/// When <see cref="LoopAgentOptions.FreshContextPerIteration"/> is <see langword="true"/>, each re-invocation restarts
/// from the original input messages plus an aggregated feedback log, and the session is reset for each iteration: a
/// loop-owned session is created anew, while a caller-supplied session is restored from a snapshot taken at the start
/// of the run (so the wrapped agent must support session serialization). An evaluator may instead supply the exact next
/// messages via <see cref="LoopEvaluation.ContinueWithMessages"/>, bypassing this construction.
/// </para>
/// <para>
/// The loop is bounded by a global safety cap (<see cref="LoopAgentOptions.MaxIterations"/>) regardless of the
/// evaluators. If an iteration produces a pending tool-approval request, the loop stops and returns that response to
/// the caller rather than attempting to resolve the approval automatically.
/// </para>
/// <para>
/// A non-streaming run returns, by default, a single <see cref="AgentResponse"/> that aggregates the full transcript
/// in order: the on-behalf-of messages the loop injected for each re-invocation followed by that iteration's response
/// messages. The caller's original input messages are not echoed. Set
/// <see cref="LoopAgentOptions.NonStreamingReturnsLastResponseOnly"/> to instead return only the final iteration's
/// response. A streaming run always yields every iteration's updates, emitting the injected on-behalf-of messages as
/// updates before each re-invocation. The injected messages can be attributed with
/// <see cref="LoopAgentOptions.OnBehalfOfAuthorName"/>, or omitted from the surfaced output entirely with
/// <see cref="LoopAgentOptions.ExcludeOnBehalfOfMessages"/>.
/// </para>
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class LoopAgent : DelegatingAIAgent
{
/// <summary>The default value used for <see cref="LoopAgentOptions.MaxIterations"/> when none is specified.</summary>
public const int DefaultMaxIterations = 10;
private readonly IReadOnlyList<LoopEvaluator> _evaluators;
private readonly int _maxIterations;
private readonly bool _freshContextPerIteration;
private readonly string? _onBehalfOfAuthorName;
private readonly bool _excludeOnBehalfOfMessages;
private readonly bool _nonStreamingReturnsLastResponseOnly;
private readonly System.Func<AgentSession, CancellationToken, ValueTask>? _sessionCreatedCallback;
private readonly ILogger _logger;
/// <summary>
/// Initializes a new instance of the <see cref="LoopAgent"/> class with a single evaluator.
/// </summary>
/// <param name="innerAgent">The underlying agent to invoke in a loop.</param>
/// <param name="evaluator">The <see cref="LoopEvaluator"/> that decides whether to re-invoke the agent.</param>
/// <param name="options">Optional configuration for the loop. When <see langword="null"/>, defaults are used.</param>
/// <param name="loggerFactory">Optional factory used to create the loop's logger.</param>
/// <exception cref="System.ArgumentNullException"><paramref name="innerAgent"/> or <paramref name="evaluator"/> is <see langword="null"/>.</exception>
/// <exception cref="System.ArgumentOutOfRangeException"><see cref="LoopAgentOptions.MaxIterations"/> is less than 1.</exception>
public LoopAgent(AIAgent innerAgent, LoopEvaluator evaluator, LoopAgentOptions? options = null, ILoggerFactory? loggerFactory = null)
: this(innerAgent, [Throw.IfNull(evaluator)], options, loggerFactory)
{
}
/// <summary>
/// Initializes a new instance of the <see cref="LoopAgent"/> class with one or more evaluators.
/// </summary>
/// <param name="innerAgent">The underlying agent to invoke in a loop.</param>
/// <param name="evaluators">
/// The ordered set of <see cref="LoopEvaluator"/> that decide whether to re-invoke the agent. They are evaluated in
/// order after each iteration and the first that asks to re-invoke wins.
/// </param>
/// <param name="options">Optional configuration for the loop. When <see langword="null"/>, defaults are used.</param>
/// <param name="loggerFactory">Optional factory used to create the loop's logger.</param>
/// <exception cref="System.ArgumentNullException"><paramref name="innerAgent"/> or <paramref name="evaluators"/> is <see langword="null"/>, or <paramref name="evaluators"/> contains a <see langword="null"/> element.</exception>
/// <exception cref="System.ArgumentException"><paramref name="evaluators"/> is empty.</exception>
/// <exception cref="System.ArgumentOutOfRangeException"><see cref="LoopAgentOptions.MaxIterations"/> is less than 1.</exception>
public LoopAgent(AIAgent innerAgent, IEnumerable<LoopEvaluator> evaluators, LoopAgentOptions? options = null, ILoggerFactory? loggerFactory = null)
: base(innerAgent)
{
_ = Throw.IfNull(evaluators);
LoopEvaluator[] evaluatorArray = evaluators.ToArray();
if (evaluatorArray.Length == 0)
{
throw new System.ArgumentException("At least one evaluator must be supplied.", nameof(evaluators));
}
foreach (LoopEvaluator item in evaluatorArray)
{
_ = Throw.IfNull(item, nameof(evaluators));
}
this._evaluators = evaluatorArray;
this._maxIterations = Throw.IfLessThan(options?.MaxIterations ?? DefaultMaxIterations, 1);
this._freshContextPerIteration = options?.FreshContextPerIteration ?? false;
this._onBehalfOfAuthorName = options?.OnBehalfOfAuthorName;
this._excludeOnBehalfOfMessages = options?.ExcludeOnBehalfOfMessages ?? false;
this._nonStreamingReturnsLastResponseOnly = options?.NonStreamingReturnsLastResponseOnly ?? false;
this._sessionCreatedCallback = options?.SessionCreatedCallback;
this._logger = (loggerFactory ?? NullLoggerFactory.Instance).CreateLogger<LoopAgent>();
}
/// <inheritdoc />
protected override async Task<AgentResponse> RunCoreAsync(
IEnumerable<ChatMessage> messages,
AgentSession? session = null,
AgentRunOptions? options = null,
CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(messages);
// Capture the caller's initial messages (sent once) and ensure the loop always runs against a session.
IReadOnlyList<ChatMessage> initialMessages = messages as IReadOnlyList<ChatMessage> ?? messages.ToList();
bool sessionProvidedByCaller = session is not null;
if (session is null)
{
session = await this.InnerAgent.CreateSessionAsync(cancellationToken).ConfigureAwait(false);
await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false);
}
// When a fresh context is requested over a caller-supplied session, snapshot the pristine session up front so
// each re-invocation can restart from a fresh clone (see CreateFreshIterationSessionAsync). Taken before the
// first iteration mutates the session.
JsonElement? initialSessionSnapshot = this._freshContextPerIteration && sessionProvidedByCaller
? await this.InnerAgent.SerializeSessionAsync(session, cancellationToken: cancellationToken).ConfigureAwait(false)
: null;
LoopContext? context = null;
List<string?> feedbackLog = [];
IEnumerable<ChatMessage> currentMessages = initialMessages;
int iteration = 0;
// Aggregates the full transcript across iterations: each iteration's surfaced on-behalf-of input messages
// followed by that iteration's response messages. Unused when only the final response is returned.
List<ChatMessage> transcript = [];
// The loop-synthesized on-behalf-of messages that drive the current iteration (none for the first iteration).
IReadOnlyList<ChatMessage> currentSurfaced = [];
while (true)
{
// Run the wrapped agent using the context's session once it exists (it may have been replaced for a fresh
// context), otherwise the resolved session for the first run.
AgentSession activeSession = context?.Session ?? session;
AgentResponse response = await this.InnerAgent.RunAsync(currentMessages, activeSession, options, cancellationToken).ConfigureAwait(false);
iteration++;
// Record this iteration's on-behalf-of input (before the response it elicited) and the response itself.
transcript.AddRange(currentSurfaced);
transcript.AddRange(response.Messages);
// Create the context after the first run (so LastResponse is never null) and reuse it thereafter.
// Expose the feedback log as a read-only wrapper so evaluators cannot downcast and mutate it; the
// wrapper still reflects entries appended by the loop.
context ??= new LoopContext(this.InnerAgent, session, initialMessages, response, options) { Feedback = feedbackLog.AsReadOnly() };
context.Iteration = iteration;
context.LastResponse = response;
// Stop and surface the response when the agent is waiting for a tool approval.
if (HasPendingApprovalRequests(response))
{
return this.BuildResult(response, transcript);
}
// Enforce the global safety cap regardless of what the evaluators want.
if (iteration >= this._maxIterations)
{
this.LogMaxIterationsReached(iteration);
return this.BuildResult(response, transcript);
}
// Ask the evaluators whether to continue; stop when none of them request a re-invocation.
LoopNextStep step = await this.EvaluateAndBuildNextAsync(context, feedbackLog, initialSessionSnapshot, cancellationToken).ConfigureAwait(false);
if (!step.ShouldContinue)
{
return this.BuildResult(response, transcript);
}
currentMessages = step.Messages;
currentSurfaced = step.SurfacedMessages;
}
}
/// <inheritdoc />
protected override async IAsyncEnumerable<AgentResponseUpdate> RunCoreStreamingAsync(
IEnumerable<ChatMessage> messages,
AgentSession? session = null,
AgentRunOptions? options = null,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(messages);
// Capture the caller's initial messages (sent once) and ensure the loop always runs against a session.
IReadOnlyList<ChatMessage> initialMessages = messages as IReadOnlyList<ChatMessage> ?? messages.ToList();
bool sessionProvidedByCaller = session is not null;
if (session is null)
{
session = await this.InnerAgent.CreateSessionAsync(cancellationToken).ConfigureAwait(false);
await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false);
}
// When a fresh context is requested over a caller-supplied session, snapshot the pristine session up front so
// each re-invocation can restart from a fresh clone (see CreateFreshIterationSessionAsync). Taken before the
// first iteration mutates the session.
JsonElement? initialSessionSnapshot = this._freshContextPerIteration && sessionProvidedByCaller
? await this.InnerAgent.SerializeSessionAsync(session, cancellationToken: cancellationToken).ConfigureAwait(false)
: null;
LoopContext? context = null;
List<string?> feedbackLog = [];
IEnumerable<ChatMessage> currentMessages = initialMessages;
int iteration = 0;
// The loop-synthesized on-behalf-of messages that drive the current iteration (none for the first iteration).
IReadOnlyList<ChatMessage> currentSurfaced = [];
while (true)
{
// Stream this iteration's updates to the caller while collecting them so the iteration's full
// response can be aggregated for evaluation (true per-iteration streaming). Uses the context's
// session once it exists (it may have been replaced for a fresh context), otherwise the resolved session.
AgentSession activeSession = context?.Session ?? session;
List<AgentResponseUpdate> updates = [];
// The on-behalf-of messages that drive this iteration are surfaced before the response they elicit (none
// for the first iteration). They are flushed lazily on the first inner update so they can be stamped with
// that update's ResponseId/AgentId, keeping them grouped with the iteration for downstream mergers.
bool surfacedPending = currentSurfaced.Count > 0;
await foreach (var update in this.InnerAgent.RunStreamingAsync(currentMessages, activeSession, options, cancellationToken).ConfigureAwait(false))
{
if (surfacedPending)
{
foreach (ChatMessage surfaced in currentSurfaced)
{
yield return CreateOnBehalfOfUpdate(surfaced, update.ResponseId);
}
surfacedPending = false;
}
updates.Add(update);
yield return update;
}
// The inner agent produced no updates this iteration; surface the on-behalf-of messages anyway. Since there
// is no iteration response to inherit from, generate a ResponseId so they still group together downstream.
if (surfacedPending)
{
string fallbackResponseId = System.Guid.NewGuid().ToString("N");
foreach (ChatMessage surfaced in currentSurfaced)
{
yield return CreateOnBehalfOfUpdate(surfaced, fallbackResponseId);
}
}
// Aggregate this iteration's updates and record the result on the context.
iteration++;
AgentResponse response = updates.ToAgentResponse();
// Create the context after the first run (so LastResponse is never null) and reuse it thereafter.
// Expose the feedback log as a read-only wrapper so evaluators cannot downcast and mutate it; the
// wrapper still reflects entries appended by the loop.
context ??= new LoopContext(this.InnerAgent, session, initialMessages, response, options) { Feedback = feedbackLog.AsReadOnly() };
context.Iteration = iteration;
context.LastResponse = response;
// Stop when the agent is waiting for a tool approval.
if (HasPendingApprovalRequests(response))
{
yield break;
}
// Enforce the global safety cap regardless of what the evaluators want.
if (iteration >= this._maxIterations)
{
this.LogMaxIterationsReached(iteration);
yield break;
}
// Ask the evaluators whether to continue; stop when none of them request a re-invocation.
LoopNextStep step = await this.EvaluateAndBuildNextAsync(context, feedbackLog, initialSessionSnapshot, cancellationToken).ConfigureAwait(false);
if (!step.ShouldContinue)
{
yield break;
}
currentMessages = step.Messages;
currentSurfaced = step.SurfacedMessages;
}
}
/// <summary>
/// Evaluates the evaluators in order and, for the first one that requests a re-invocation, builds the next input
/// according to the loop's feedback and fresh-context policy.
/// </summary>
private async ValueTask<LoopNextStep> EvaluateAndBuildNextAsync(LoopContext context, List<string?> feedbackLog, JsonElement? initialSessionSnapshot, CancellationToken cancellationToken)
{
// Evaluate in order; the first evaluator that requests a re-invocation wins.
LoopEvaluation? winner = null;
foreach (LoopEvaluator evaluator in this._evaluators)
{
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context, cancellationToken).ConfigureAwait(false);
if (evaluation.ShouldReinvoke)
{
winner = evaluation;
break;
}
}
// Every evaluator asked to stop.
if (winner is null)
{
return LoopNextStep.Stop();
}
// Start the next iteration from a fresh session when a fresh context is requested, so no prior conversation
// history leaks across iterations. This applies regardless of how the next input is built (feedback or explicit
// ContinueWithMessages): a caller-supplied session is cloned from the pristine start-of-run snapshot; a
// loop-owned session is created anew.
if (this._freshContextPerIteration)
{
context.Session = await this.CreateFreshIterationSessionAsync(context, initialSessionSnapshot, cancellationToken).ConfigureAwait(false);
}
// Record one feedback entry for this re-invoked iteration (null when none, including ContinueWithMessages
// iterations which carry no feedback string) so the log stays aligned: one entry per re-invoked iteration, with
// the last element always corresponding to the latest re-invoked iteration. Continue() normalizes whitespace to null.
feedbackLog.Add(winner.Feedback);
// An evaluator supplied explicit messages: send them verbatim, bypassing feedback/message construction (the
// session is still reset above when a fresh context is requested). These are surfaced to the caller as-is (the
// evaluator owns them, including any author name).
if (winner.Messages is not null)
{
return LoopNextStep.Continue(winner.Messages, this.Surfaced(winner.Messages));
}
(List<ChatMessage> messages, List<ChatMessage> surfaced) = this.BuildNextMessages(context, feedbackLog);
return LoopNextStep.Continue(messages, this.Surfaced(surfaced));
}
/// <summary>
/// Returns the messages to surface to the caller, honoring <see cref="LoopAgentOptions.ExcludeOnBehalfOfMessages"/>.
/// </summary>
private IReadOnlyList<ChatMessage> Surfaced(IReadOnlyList<ChatMessage> surfaced)
=> this._excludeOnBehalfOfMessages ? [] : surfaced;
/// <summary>
/// Creates a streaming update for a surfaced on-behalf-of message, inheriting the driven iteration's
/// <paramref name="responseId"/> so downstream mergers group it with that iteration, and ensuring a unique
/// non-null <see cref="AgentResponseUpdate.MessageId"/>. The <see cref="AgentResponseUpdate.AgentId"/> is left
/// unset because the message is synthesized by the loop, not produced by the wrapped agent.
/// </summary>
private static AgentResponseUpdate CreateOnBehalfOfUpdate(ChatMessage message, string? responseId)
=> new(message.Role, message.Contents)
{
AuthorName = message.AuthorName,
MessageId = message.MessageId is { Length: > 0 } messageId ? messageId : System.Guid.NewGuid().ToString("N"),
ResponseId = responseId,
};
/// <summary>
/// Builds the messages sent to the wrapped agent for the next iteration along with the subset that should be
/// surfaced to the caller (the loop-synthesized on-behalf-of feedback). Replayed caller input is excluded from the
/// surfaced subset.
/// </summary>
private (List<ChatMessage> Messages, List<ChatMessage> Surfaced) BuildNextMessages(LoopContext context, List<string?> feedback)
{
var messages = new List<ChatMessage>();
var surfaced = new List<ChatMessage>();
if (this._freshContextPerIteration)
{
// Fresh context: re-send the original task plus an aggregated log of all feedback recorded so far. Only the
// synthesized feedback message is surfaced; the replayed caller input messages are not.
messages.AddRange(context.InitialMessages);
ChatMessage? feedbackMessage = this.BuildAggregatedFeedbackMessage(feedback);
if (feedbackMessage is not null)
{
messages.Add(feedbackMessage);
surfaced.Add(feedbackMessage);
}
}
else
{
// Reused session: send only the latest feedback verbatim (the session already retains earlier turns). When
// the latest iteration produced no feedback, send no messages and let the agent continue from history.
string? latest = feedback.Count > 0 ? feedback[feedback.Count - 1] : null;
if (!string.IsNullOrWhiteSpace(latest))
{
var feedbackMessage = new ChatMessage(ChatRole.User, latest) { AuthorName = this._onBehalfOfAuthorName, MessageId = System.Guid.NewGuid().ToString("N") };
messages.Add(feedbackMessage);
surfaced.Add(feedbackMessage);
}
}
return (messages, surfaced);
}
private ChatMessage? BuildAggregatedFeedbackMessage(IReadOnlyList<string?> feedback)
{
var body = new StringBuilder("## Feedback\n");
bool any = false;
foreach (string? entry in feedback)
{
if (!string.IsNullOrWhiteSpace(entry))
{
body.Append("\n- ").Append(entry);
any = true;
}
}
return any ? new ChatMessage(ChatRole.User, body.ToString()) { AuthorName = this._onBehalfOfAuthorName, MessageId = System.Guid.NewGuid().ToString("N") } : null;
}
/// <summary>
/// Produces the non-streaming run result: either the final iteration's response (when configured) or an
/// aggregated response carrying the full transcript with the final response's metadata.
/// </summary>
private AgentResponse BuildResult(AgentResponse lastResponse, List<ChatMessage> transcript)
{
if (this._nonStreamingReturnsLastResponseOnly)
{
return lastResponse;
}
return new AgentResponse(transcript)
{
AgentId = lastResponse.AgentId,
ResponseId = lastResponse.ResponseId,
CreatedAt = lastResponse.CreatedAt,
FinishReason = lastResponse.FinishReason,
Usage = lastResponse.Usage,
AdditionalProperties = lastResponse.AdditionalProperties,
ContinuationToken = lastResponse.ContinuationToken,
};
}
private static bool HasPendingApprovalRequests(AgentResponse response)
{
foreach (ChatMessage message in response.Messages)
{
foreach (AIContent content in message.Contents)
{
if (content is ToolApprovalRequestContent)
{
return true;
}
}
}
return false;
}
private void LogMaxIterationsReached(int iteration)
{
if (this._logger.IsEnabled(LogLevel.Information))
{
this._logger.LogInformation("LoopAgent reached the maximum of {MaxIterations} iterations and stopped.", iteration);
}
}
/// <summary>
/// Creates the session used for the next iteration when a fresh context is requested. A caller-supplied session is
/// restored from the pristine start-of-run snapshot by deserializing a fresh clone; a loop-owned session (no
/// snapshot) is created anew. The configured session-created callback is notified of the new session.
/// </summary>
private async ValueTask<AgentSession> CreateFreshIterationSessionAsync(LoopContext context, JsonElement? initialSessionSnapshot, CancellationToken cancellationToken)
{
AgentSession session = initialSessionSnapshot is { } snapshot
? await this.InnerAgent.DeserializeSessionAsync(snapshot, cancellationToken: cancellationToken).ConfigureAwait(false)
: await context.Agent.CreateSessionAsync(cancellationToken).ConfigureAwait(false);
await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false);
return session;
}
/// <summary>
/// Invokes the configured <see cref="LoopAgentOptions.SessionCreatedCallback"/> (if any) with a session the loop
/// has just created, so the caller can observe the latest session.
/// </summary>
private async ValueTask NotifyNewSessionAsync(AgentSession session, CancellationToken cancellationToken)
{
if (this._sessionCreatedCallback is not null)
{
await this._sessionCreatedCallback(session, cancellationToken).ConfigureAwait(false);
}
}
/// <summary>Represents the loop's decision for the next iteration: stop, or continue with a set of messages.</summary>
private readonly struct LoopNextStep
{
private LoopNextStep(bool shouldContinue, IReadOnlyList<ChatMessage> messages, IReadOnlyList<ChatMessage> surfacedMessages)
{
this.ShouldContinue = shouldContinue;
this.Messages = messages;
this.SurfacedMessages = surfacedMessages;
}
public bool ShouldContinue { get; }
/// <summary>Gets the full set of messages sent to the wrapped agent for the next iteration.</summary>
public IReadOnlyList<ChatMessage> Messages { get; }
/// <summary>
/// Gets the subset of <see cref="Messages"/> the loop synthesized on the caller's behalf (feedback or
/// evaluator-supplied messages) that should be surfaced to the caller. Replayed caller input is excluded.
/// </summary>
public IReadOnlyList<ChatMessage> SurfacedMessages { get; }
public static LoopNextStep Stop() => new(shouldContinue: false, [], []);
public static LoopNextStep Continue(IReadOnlyList<ChatMessage> messages, IReadOnlyList<ChatMessage> surfacedMessages)
=> new(shouldContinue: true, messages, surfacedMessages);
}
}
@@ -0,0 +1,117 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Diagnostics.CodeAnalysis;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.DiagnosticIds;
namespace Microsoft.Agents.AI;
/// <summary>
/// Provides configuration options for <see cref="LoopAgent"/>.
/// </summary>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class LoopAgentOptions
{
/// <summary>
/// Gets or sets the global safety cap on the number of times the wrapped agent is invoked in a single loop run,
/// or <see langword="null"/> to use <see cref="LoopAgent.DefaultMaxIterations"/>.
/// </summary>
/// <remarks>
/// This is an absolute upper bound that applies regardless of the configured <see cref="LoopEvaluator"/> set. An
/// evaluator may stop the loop earlier, but no evaluator can cause the loop to exceed this cap, so raise this value
/// if you intend to allow longer loops.
/// </remarks>
public int? MaxIterations { get; set; }
/// <summary>
/// Gets or sets a value indicating whether each re-invocation restarts from a clean context: the original input
/// messages plus an aggregated feedback log, rather than the latest feedback appended to the prior conversation.
/// Defaults to <see langword="false"/>.
/// </summary>
/// <remarks>
/// <para>
/// This rebuilds the input <em>messages</em> each iteration and resets the session before each re-invocation so no
/// prior conversation history leaks across iterations. When the loop owns the session it creates a new one each
/// iteration. When the caller supplies a session, <see cref="LoopAgent"/> serializes it once at the start of the run
/// and restores a fresh clone (by deserializing that snapshot) before each re-invocation; this requires the wrapped
/// agent to support session serialization. The first iteration still runs against the caller's supplied session.
/// </para>
/// <para>
/// Note that cloning will only result in a fresh context, if the chat history storage mechanism supports cloning.
/// For example the default in-memory storage supports cloning, since the messages are serialized as part of the snapshot.
/// </para>
/// <para>
/// However, if the Conversations service is used, which stores messages in a single threaded list of messages,
/// then the cloned session will still contain the full message history, since the snapshot only captures an id reference
/// to the conversation and not the individual messages.
/// </para>
/// <para>
/// On the other hand, if responses are used with response ids, cloning will work well, since response ids are
/// forkable. Each new response has its own id, and is based on the id of the previous response.
/// </para>
/// <para>
/// On iterations where an evaluator returns explicit messages via
/// <see cref="LoopEvaluation.ContinueWithMessages"/>, the session is still reset (a fresh or cloned session is
/// used); only the rebuild of the input messages from the feedback log is skipped, because the evaluator's explicit
/// messages are sent verbatim.
/// </para>
/// </remarks>
public bool FreshContextPerIteration { get; set; }
/// <summary>
/// Gets or sets the author name stamped on the loop-synthesized "on-behalf-of" messages that the loop injects
/// into the wrapped agent for re-invocations, or <see langword="null"/> to leave them unattributed. Defaults to
/// <see langword="null"/>.
/// </summary>
/// <remarks>
/// When the loop re-invokes the wrapped agent it sends feedback messages on the caller's behalf. Setting this name
/// marks those autonomous messages (for example with a value such as <c>"loop"</c>) so that callers and the wrapped
/// agent can distinguish them from the caller's own turns. It is applied only to messages the loop synthesizes
/// itself; messages supplied explicitly by an evaluator via <see cref="LoopEvaluation.ContinueWithMessages"/> are
/// left untouched, and the caller's original input messages are never modified.
/// </remarks>
public string? OnBehalfOfAuthorName { get; set; }
/// <summary>
/// Gets or sets a value indicating whether the on-behalf-of messages the loop injects for re-invocations are
/// omitted from the output surfaced back to the caller. Defaults to <see langword="false"/>.
/// </summary>
/// <remarks>
/// When <see langword="false"/> (the default) a streaming run emits the injected feedback / evaluator-supplied
/// messages as updates before each re-invocation, and a non-streaming run includes them in the aggregated
/// transcript, so callers can see the loop acting autonomously on their behalf. Set this to <see langword="true"/>
/// to omit those messages from the returned output and surface only the wrapped agent's responses; the messages are
/// still sent to the wrapped agent. This setting has no effect when
/// <see cref="NonStreamingReturnsLastResponseOnly"/> causes a non-streaming run to return only the final response.
/// </remarks>
public bool ExcludeOnBehalfOfMessages { get; set; }
/// <summary>
/// Gets or sets a value indicating whether a non-streaming run returns only the final iteration's response instead
/// of the aggregated transcript of every iteration. Defaults to <see langword="false"/>.
/// </summary>
/// <remarks>
/// By default a non-streaming <see cref="LoopAgent"/> run returns a single <see cref="AgentResponse"/> that
/// aggregates, in order, the on-behalf-of messages the loop injected and the responses produced by every
/// iteration — mirroring the full sequence of updates yielded by a streaming run. Set this to <see langword="true"/>
/// to instead return only the last iteration's <see cref="AgentResponse"/>. This setting affects non-streaming runs
/// only; streaming runs always yield every iteration's updates.
/// </remarks>
public bool NonStreamingReturnsLastResponseOnly { get; set; }
/// <summary>
/// Gets or sets an optional callback invoked whenever <see cref="LoopAgent"/> creates a new session, so the caller
/// can capture the latest session (for example to continue the conversation after the loop completes). Defaults to
/// <see langword="null"/>.
/// </summary>
/// <remarks>
/// The callback is invoked with each session the loop itself creates: the initial loop-owned session (when the
/// caller does not supply one) and, when <see cref="FreshContextPerIteration"/> is enabled, every session created
/// for a re-invocation — whether a brand-new loop-owned session or a fresh clone deserialized from the caller's
/// original session. It is not invoked for a caller-supplied session, since the caller already holds that one. When
/// it fires multiple times, the most recent invocation carries the session the loop is currently using.
/// </remarks>
public Func<AgentSession, CancellationToken, ValueTask>? SessionCreatedCallback { get; set; }
}
@@ -0,0 +1,97 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Extensions.AI;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;
namespace Microsoft.Agents.AI;
/// <summary>
/// Provides the per-run state that a <see cref="LoopEvaluator"/> uses to decide whether a
/// <see cref="LoopAgent"/> should re-invoke the wrapped agent and what feedback to provide.
/// </summary>
/// <remarks>
/// A single <see cref="LoopContext"/> instance is created for each <see cref="LoopAgent"/> run and is
/// reused across iterations, with <see cref="Iteration"/> and <see cref="LastResponse"/> updated before
/// each call to <see cref="LoopEvaluator.EvaluateAsync"/>. Because evaluator instances are expected to be
/// stateless and may be shared across concurrent runs, any per-run mutable state must be stored on this
/// context — for example via <see cref="AdditionalProperties"/> — rather than in fields on the evaluator itself.
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class LoopContext
{
/// <summary>
/// Initializes a new instance of the <see cref="LoopContext"/> class.
/// </summary>
/// <param name="agent">The wrapped <see cref="AIAgent"/> that is being looped.</param>
/// <param name="session">The <see cref="AgentSession"/> used for the loop.</param>
/// <param name="initialMessages">The messages passed in for the first iteration of the loop.</param>
/// <param name="lastResponse">The <see cref="AgentResponse"/> produced by the iteration that just completed.</param>
/// <param name="runOptions">The <see cref="AgentRunOptions"/> that were passed to the loop run, if any.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="agent"/>, <paramref name="session"/>, <paramref name="initialMessages"/>, or
/// <paramref name="lastResponse"/> is <see langword="null"/>.
/// </exception>
public LoopContext(
AIAgent agent,
AgentSession session,
IReadOnlyList<ChatMessage> initialMessages,
AgentResponse lastResponse,
AgentRunOptions? runOptions = null)
{
this.Agent = Throw.IfNull(agent);
this.Session = Throw.IfNull(session);
this.InitialMessages = Throw.IfNull(initialMessages);
this.LastResponse = Throw.IfNull(lastResponse);
this.RunOptions = runOptions;
}
/// <summary>Gets the wrapped <see cref="AIAgent"/> that is being looped.</summary>
public AIAgent Agent { get; }
/// <summary>Gets the <see cref="AgentSession"/> used for the loop.</summary>
/// <remarks>
/// When the caller does not provide a session, <see cref="LoopAgent"/> creates one up front. By default the same
/// session is reused across every iteration so that conversation continuity is preserved and the original request
/// is not replayed. When <see cref="LoopAgentOptions.FreshContextPerIteration"/> is enabled, <see cref="LoopAgent"/>
/// resets the session before each re-invocation: a loop-owned session is created anew, while a caller-supplied
/// session is restored from a snapshot taken at the start of the run by deserializing a fresh clone.
/// </remarks>
public AgentSession Session { get; internal set; }
/// <summary>Gets the messages that were passed in for the first iteration of the loop.</summary>
public IReadOnlyList<ChatMessage> InitialMessages { get; }
/// <summary>Gets the <see cref="AgentRunOptions"/> that were passed to the loop run, if any.</summary>
public AgentRunOptions? RunOptions { get; }
/// <summary>Gets the number of completed agent runs so far (1-based after the first run).</summary>
public int Iteration { get; internal set; }
/// <summary>Gets the <see cref="AgentResponse"/> produced by the iteration that just completed.</summary>
public AgentResponse LastResponse { get; internal set; }
/// <summary>
/// Gets the feedback accumulated across iterations so far, one entry per re-invoked iteration in order.
/// </summary>
/// <remarks>
/// Each entry is the feedback supplied by the evaluator that requested the corresponding re-invocation, or
/// <see langword="null"/> when that iteration produced no feedback string (for example a plain
/// <see cref="LoopEvaluation.Continue(string)"/> with no text, or a <see cref="LoopEvaluation.ContinueWithMessages"/>
/// that supplied explicit messages instead). The log records one entry per re-invoked iteration regardless of mode,
/// so the last entry always corresponds to the most recent re-invoked iteration. This log is owned and populated by
/// <see cref="LoopAgent"/>; evaluators may read it to reason over prior feedback.
/// </remarks>
public IReadOnlyList<string?> Feedback { get; internal set; } = [];
/// <summary>
/// Gets a mutable bag of per-run state shared across iterations and available to every evaluator.
/// </summary>
/// <remarks>
/// This dictionary is owned by the loop run (not by any evaluator instance) so that evaluators can remain
/// stateless. Evaluators can stash arbitrary per-run state here keyed by a collision-resistant key.
/// </remarks>
public AdditionalPropertiesDictionary AdditionalProperties { get; } = new();
}
@@ -0,0 +1,86 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using Microsoft.Extensions.AI;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;
namespace Microsoft.Agents.AI;
/// <summary>
/// Represents the result produced by a <see cref="LoopEvaluator"/> after an agent iteration: whether the
/// <see cref="LoopAgent"/> should re-invoke the wrapped agent and, optionally, the feedback or explicit messages that
/// should inform the next iteration.
/// </summary>
/// <remarks>
/// An evaluator is concerned only with the judgment (continue or stop) and what to carry forward. In the common case
/// it returns a feedback string and lets the <see cref="LoopAgent"/> decide how that feedback is turned into the next
/// input (and whether the session is reset). For full control, <see cref="ContinueWithMessages"/> supplies the exact
/// messages to send next, bypassing the loop's feedback and message construction.
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public sealed class LoopEvaluation
{
private static readonly LoopEvaluation s_stop = new(shouldReinvoke: false, feedback: null, messages: null);
private LoopEvaluation(bool shouldReinvoke, string? feedback, IReadOnlyList<ChatMessage>? messages)
{
this.ShouldReinvoke = shouldReinvoke;
this.Feedback = feedback;
this.Messages = messages;
}
/// <summary>Gets a value indicating whether the loop should run the wrapped agent again.</summary>
public bool ShouldReinvoke { get; }
/// <summary>
/// Gets the feedback describing what is missing or what the agent should do next, or <see langword="null"/> when
/// no feedback was produced.
/// </summary>
/// <remarks>This value is only meaningful when <see cref="ShouldReinvoke"/> is <see langword="true"/>.</remarks>
public string? Feedback { get; }
/// <summary>
/// Gets the explicit messages to send on the next iteration, or <see langword="null"/> when the loop should build
/// the next input from feedback instead.
/// </summary>
/// <remarks>
/// When non-<see langword="null"/>, the <see cref="LoopAgent"/> sends these messages verbatim and does not apply
/// its feedback or message construction. The session is still reset when
/// <see cref="LoopAgentOptions.FreshContextPerIteration"/> is enabled. Only meaningful when
/// <see cref="ShouldReinvoke"/> is <see langword="true"/>.
/// </remarks>
internal IReadOnlyList<ChatMessage>? Messages { get; }
/// <summary>Creates an evaluation that stops the loop and returns the latest response to the caller.</summary>
/// <returns>An evaluation with <see cref="ShouldReinvoke"/> set to <see langword="false"/>.</returns>
public static LoopEvaluation Stop() => s_stop;
/// <summary>Creates an evaluation that re-invokes the wrapped agent, optionally carrying feedback forward.</summary>
/// <param name="feedback">
/// Optional feedback to inform the next iteration. <see langword="null"/>, empty, or whitespace is treated as no
/// feedback.
/// </param>
/// <returns>An evaluation with <see cref="ShouldReinvoke"/> set to <see langword="true"/>.</returns>
public static LoopEvaluation Continue(string? feedback = null) => new(shouldReinvoke: true, string.IsNullOrWhiteSpace(feedback) ? null : feedback, messages: null);
/// <summary>
/// Creates an evaluation that re-invokes the wrapped agent with the specified messages, bypassing the loop's
/// feedback and message construction.
/// </summary>
/// <param name="messages">The messages to send to the wrapped agent on the next iteration.</param>
/// <returns>An evaluation with <see cref="ShouldReinvoke"/> set to <see langword="true"/>.</returns>
/// <exception cref="System.ArgumentNullException"><paramref name="messages"/> is <see langword="null"/>.</exception>
/// <remarks>
/// Use this for full control over the next input (for example to send non-user roles, multiple messages, or
/// non-text content). The supplied messages are sent verbatim and the loop does not accumulate or inject feedback
/// for this iteration.
/// </remarks>
public static LoopEvaluation ContinueWithMessages(IEnumerable<ChatMessage> messages)
{
_ = Throw.IfNull(messages);
return new LoopEvaluation(shouldReinvoke: true, feedback: null, messages: messages as IReadOnlyList<ChatMessage> ?? messages.ToList());
}
}
@@ -0,0 +1,41 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Diagnostics.CodeAnalysis;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.DiagnosticIds;
namespace Microsoft.Agents.AI;
/// <summary>
/// Provides the abstract base class for the component that decides, after each agent iteration, whether a
/// <see cref="LoopAgent"/> should re-invoke the wrapped agent and what feedback to provide.
/// </summary>
/// <remarks>
/// <para>
/// A <see cref="LoopEvaluator"/> is pure judgment: it inspects the <see cref="LoopContext"/> and returns a
/// <see cref="LoopEvaluation"/> describing whether to continue and any feedback for the next iteration. It does not
/// manage the session or construct the next input messages — that is the responsibility of the
/// <see cref="LoopAgent"/> that consumes it.
/// </para>
/// <para>
/// Out-of-the-box implementations include <see cref="AIJudgeLoopEvaluator"/>, <see cref="DelegateLoopEvaluator"/>,
/// and <see cref="CompletionMarkerLoopEvaluator"/>. Implementations should be stateless and safe to share across
/// concurrent loop runs; any per-run state must be stored on the supplied <see cref="LoopContext"/>.
/// </para>
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
public abstract class LoopEvaluator
{
/// <summary>
/// Evaluates the loop state after an iteration and decides whether to re-invoke the wrapped agent and what
/// feedback to provide.
/// </summary>
/// <param name="context">The per-run <see cref="LoopContext"/> describing the current loop state.</param>
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests.</param>
/// <returns>
/// A value task whose result is a <see cref="LoopEvaluation"/> indicating whether to continue and, if so, the
/// feedback to carry forward to the next iteration.
/// </returns>
public abstract ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default);
}
@@ -0,0 +1,16 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Diagnostics.CodeAnalysis;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace Microsoft.Agents.AI;
/// <summary>
/// Source-generated <see cref="JsonSerializerContext"/> for loop types that require JSON serialization, such as the
/// structured <see cref="JudgeVerdict"/> used by <see cref="AIJudgeLoopEvaluator"/>.
/// </summary>
[JsonSourceGenerationOptions(JsonSerializerDefaults.Web)]
[JsonSerializable(typeof(JudgeVerdict))]
[ExcludeFromCodeCoverage]
internal sealed partial class LoopJsonContext : JsonSerializerContext;
@@ -0,0 +1,314 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Moq;
using static Microsoft.Agents.AI.UnitTests.LoopTestHelpers;
namespace Microsoft.Agents.AI.UnitTests;
/// <summary>
/// Unit tests for the <see cref="AIJudgeLoopEvaluator"/> class.
/// </summary>
public class AIJudgeLoopEvaluatorTests
{
/// <summary>
/// Verify that the evaluator stops when the judge reports the request was answered.
/// </summary>
[Fact]
public async Task EvaluateAsync_Answered_StopsAsync()
{
// Arrange
var judgeClient = CreateJudgeClient("{\"answered\":true}");
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
LoopContext context = CreateContext();
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.False(evaluation.ShouldReinvoke);
}
/// <summary>
/// Verify that when not answered the evaluator continues with feedback carrying the judge's gap analysis.
/// </summary>
[Fact]
public async Task EvaluateAsync_NotAnswered_ContinuesWithGapAnalysisAsync()
{
// Arrange
var judgeClient = CreateJudgeClient("{\"answered\":false,\"gapAnalysis\":\"the cost estimate is missing\"}");
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
LoopContext context = CreateContext();
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.NotNull(evaluation.Feedback);
Assert.Contains("the cost estimate is missing", evaluation.Feedback!);
Assert.DoesNotContain(AIJudgeLoopEvaluator.GapAnalysisPlaceholder, evaluation.Feedback!);
}
/// <summary>
/// Verify that the evaluator falls back to text parsing and stops when the DONE verdict marker is present.
/// </summary>
[Fact]
public async Task EvaluateAsync_TextFallback_StopsWhenAnsweredAsync()
{
// Arrange
var judgeClient = CreateJudgeClient(AIJudgeLoopEvaluator.DoneVerdictMarker);
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
LoopContext context = CreateContext();
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.False(evaluation.ShouldReinvoke);
}
/// <summary>
/// Verify that the gap-analysis placeholder is filled with a fallback token when no structured output is produced.
/// </summary>
[Fact]
public async Task EvaluateAsync_NotAnswered_TextFallback_InjectsUnknownGapAnalysisAsync()
{
// Arrange
var judgeClient = CreateJudgeClient(AIJudgeLoopEvaluator.MoreVerdictMarker);
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
LoopContext context = CreateContext();
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.Contains("<unknown>", evaluation.Feedback!);
}
/// <summary>
/// Verify that the text fallback keeps looping for replies that merely contain the substring "ANSWERED" (for
/// example "UNANSWERED" or "NOT ANSWERED") rather than the explicit DONE verdict marker.
/// </summary>
[Theory]
[InlineData("UNANSWERED")]
[InlineData("NOT ANSWERED")]
[InlineData("The request is not yet answered.")]
public async Task EvaluateAsync_TextFallback_AmbiguousReply_ContinuesAsync(string reply)
{
// Arrange
var judgeClient = CreateJudgeClient(reply);
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
LoopContext context = CreateContext();
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(evaluation.ShouldReinvoke);
}
/// <summary>
/// Verify that custom judge instructions from options are sent to the judge client.
/// </summary>
[Fact]
public async Task EvaluateAsync_CustomInstructions_AreSentToJudgeAsync()
{
// Arrange
List<ChatMessage>? judgeMessages = null;
var judgeMock = new Mock<IChatClient>();
judgeMock.Setup(c => c.GetResponseAsync(
It.IsAny<IEnumerable<ChatMessage>>(),
It.IsAny<ChatOptions>(),
It.IsAny<CancellationToken>()))
.Callback<IEnumerable<ChatMessage>, ChatOptions?, CancellationToken>((msgs, _, _) => judgeMessages = msgs.ToList())
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{\"answered\":true}")));
var evaluator = new AIJudgeLoopEvaluator(judgeMock.Object, new AIJudgeLoopEvaluatorOptions { Instructions = "CUSTOM JUDGE PROMPT" });
LoopContext context = CreateContext();
// Act
await evaluator.EvaluateAsync(context);
// Assert
Assert.NotNull(judgeMessages);
Assert.Contains(judgeMessages!, m => m.Role == ChatRole.System && m.Text == "CUSTOM JUDGE PROMPT");
}
/// <summary>
/// Verify that a custom feedback message template from options is honored.
/// </summary>
[Fact]
public async Task EvaluateAsync_CustomFeedbackMessageTemplate_IsHonoredAsync()
{
// Arrange
var judgeClient = CreateJudgeClient("{\"answered\":false,\"gapAnalysis\":\"add unit tests\"}");
const string Template = "Please address: " + AIJudgeLoopEvaluator.GapAnalysisPlaceholder;
var evaluator = new AIJudgeLoopEvaluator(judgeClient, new AIJudgeLoopEvaluatorOptions { FeedbackMessageTemplate = Template });
LoopContext context = CreateContext();
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.Equal("Please address: add unit tests", evaluation.Feedback);
}
/// <summary>
/// Verify that non-text content in the original request (for example an image) is forwarded to the judge
/// rather than being silently dropped when flattening the request to text.
/// </summary>
[Fact]
public async Task EvaluateAsync_NonTextRequestContent_IsForwardedToJudgeAsync()
{
// Arrange
List<ChatMessage>? judgeMessages = null;
var judgeMock = new Mock<IChatClient>();
judgeMock.Setup(c => c.GetResponseAsync(
It.IsAny<IEnumerable<ChatMessage>>(),
It.IsAny<ChatOptions>(),
It.IsAny<CancellationToken>()))
.Callback<IEnumerable<ChatMessage>, ChatOptions?, CancellationToken>((msgs, _, _) => judgeMessages = msgs.ToList())
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{\"answered\":true}")));
var evaluator = new AIJudgeLoopEvaluator(judgeMock.Object);
var imageContent = new DataContent(new byte[] { 1, 2, 3, 4 }, "image/png");
var context = new LoopContext(
new Mock<AIAgent>().Object,
new ChatClientAgentSession(),
[new ChatMessage(ChatRole.User, [imageContent])],
new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial answer")]));
// Act
await evaluator.EvaluateAsync(context);
// Assert
Assert.NotNull(judgeMessages);
ChatMessage userMessage = Assert.Single(judgeMessages!, m => m.Role == ChatRole.User);
Assert.Contains(userMessage.Contents.OfType<DataContent>(), c => c.MediaType == "image/png");
}
/// <summary>
/// Verify that the constructor throws when the judge client is null.
/// </summary>
[Fact]
public void AIJudgeLoopEvaluator_NullClient_Throws()
{
// Act & Assert
Assert.Throws<ArgumentNullException>("judgeClient", () => new AIJudgeLoopEvaluator(null!));
}
/// <summary>
/// Verify that EvaluateAsync throws when the context is null.
/// </summary>
[Fact]
public async Task EvaluateAsync_NullContext_ThrowsAsync()
{
// Arrange
var evaluator = new AIJudgeLoopEvaluator(CreateJudgeClient("{\"answered\":true}"));
// Act & Assert
await Assert.ThrowsAsync<ArgumentNullException>("context", async () => await evaluator.EvaluateAsync(null!));
}
/// <summary>
/// Verify that supplied criteria are rendered into the default judge instructions as a bullet list and the
/// placeholder is consumed.
/// </summary>
[Fact]
public async Task EvaluateAsync_Criteria_AreRenderedIntoDefaultInstructionsAsync()
{
// Arrange
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
var options = new AIJudgeLoopEvaluatorOptions { Criteria = ["Must cite sources", "Must be under 200 words"] };
var evaluator = new AIJudgeLoopEvaluator(judgeClient, options);
LoopContext context = CreateContext();
// Act
await evaluator.EvaluateAsync(context);
// Assert
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
Assert.Contains("The response must satisfy all of the following criteria:", system);
Assert.Contains("- Must cite sources", system);
Assert.Contains("- Must be under 200 words", system);
Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system);
}
/// <summary>
/// Verify that when no criteria are supplied the placeholder is removed and no criteria block is added to the
/// default instructions.
/// </summary>
[Fact]
public async Task EvaluateAsync_NoCriteria_LeavesDefaultInstructionsWithoutCriteriaBlockAsync()
{
// Arrange
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
LoopContext context = CreateContext();
// Act
await evaluator.EvaluateAsync(context);
// Assert
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system);
Assert.DoesNotContain("The response must satisfy all of the following criteria:", system);
}
/// <summary>
/// Verify that criteria are injected at the placeholder location in custom instructions.
/// </summary>
[Fact]
public async Task EvaluateAsync_CustomInstructionsWithPlaceholder_InjectsCriteriaAsync()
{
// Arrange
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
const string Instructions = "Judge the answer." + AIJudgeLoopEvaluator.CriteriaPlaceholder + " Be strict.";
var options = new AIJudgeLoopEvaluatorOptions { Instructions = Instructions, Criteria = ["Must include code"] };
var evaluator = new AIJudgeLoopEvaluator(judgeClient, options);
LoopContext context = CreateContext();
// Act
await evaluator.EvaluateAsync(context);
// Assert
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
Assert.StartsWith("Judge the answer.", system);
Assert.EndsWith("Be strict.", system);
Assert.Contains("- Must include code", system);
Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system);
}
/// <summary>
/// Verify that custom instructions without the placeholder do not receive the criteria.
/// </summary>
[Fact]
public async Task EvaluateAsync_CustomInstructionsWithoutPlaceholder_OmitsCriteriaAsync()
{
// Arrange
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
const string Instructions = "Judge the answer and be strict.";
var options = new AIJudgeLoopEvaluatorOptions { Instructions = Instructions, Criteria = ["Must include code"] };
var evaluator = new AIJudgeLoopEvaluator(judgeClient, options);
LoopContext context = CreateContext();
// Act
await evaluator.EvaluateAsync(context);
// Assert
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
Assert.Equal(Instructions, system);
}
private static LoopContext CreateContext() => new(
new Mock<AIAgent>().Object,
new ChatClientAgentSession(),
[new ChatMessage(ChatRole.User, "original question")],
new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial answer")]));
}
@@ -0,0 +1,145 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Moq;
namespace Microsoft.Agents.AI.UnitTests;
/// <summary>
/// Unit tests for the <see cref="CompletionMarkerLoopEvaluator"/> class.
/// </summary>
public class CompletionMarkerLoopEvaluatorTests
{
/// <summary>
/// Verify that the constructor throws when the marker is null, empty, or whitespace.
/// </summary>
/// <param name="marker">The invalid marker value.</param>
[Theory]
[InlineData(null)]
[InlineData("")]
[InlineData(" ")]
public void CompletionMarkerLoopEvaluator_InvalidMarker_Throws(string? marker)
{
// Act & Assert
Assert.ThrowsAny<ArgumentException>(() => new CompletionMarkerLoopEvaluator(marker!));
}
/// <summary>
/// Verify that the evaluator stops the loop when the marker appears in the latest response.
/// </summary>
[Fact]
public async Task EvaluateAsync_MarkerPresent_StopsAsync()
{
// Arrange
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
LoopContext context = CreateContext("all DONE here");
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.False(evaluation.ShouldReinvoke);
}
/// <summary>
/// Verify that the evaluator continues with default feedback (containing the marker) when the marker is absent.
/// </summary>
[Fact]
public async Task EvaluateAsync_MarkerAbsent_ContinuesWithDefaultFeedbackAsync()
{
// Arrange
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
LoopContext context = CreateContext("still working");
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.NotNull(evaluation.Feedback);
Assert.Contains("DONE", evaluation.Feedback!);
Assert.DoesNotContain(CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder, evaluation.Feedback!);
}
/// <summary>
/// Verify that a custom feedback template is honored, with the completion marker substituted for the placeholder.
/// </summary>
[Fact]
public async Task EvaluateAsync_MarkerAbsent_CustomTemplate_IsHonoredAsync()
{
// Arrange
const string Template = "Keep going and finish with " + CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when done.";
var evaluator = new CompletionMarkerLoopEvaluator("FINISHED", new CompletionMarkerLoopEvaluatorOptions { FeedbackMessageTemplate = Template });
LoopContext context = CreateContext("still working");
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.Equal("Keep going and finish with FINISHED when done.", evaluation.Feedback);
}
/// <summary>
/// Verify that a custom feedback template containing the last-response placeholder echoes the agent's latest
/// response text, with no leftover placeholder.
/// </summary>
[Fact]
public async Task EvaluateAsync_MarkerAbsent_CustomTemplate_SubstitutesLastResponseAsync()
{
// Arrange
const string Template = "Your previous attempt was: '" + CompletionMarkerLoopEvaluator.LastResponsePlaceholder +
"'. Improve it and finish with " + CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when done.";
var evaluator = new CompletionMarkerLoopEvaluator("FINISHED", new CompletionMarkerLoopEvaluatorOptions { FeedbackMessageTemplate = Template });
LoopContext context = CreateContext("candidate name: NoteNest");
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.Equal("Your previous attempt was: 'candidate name: NoteNest'. Improve it and finish with FINISHED when done.", evaluation.Feedback);
Assert.DoesNotContain(CompletionMarkerLoopEvaluator.LastResponsePlaceholder, evaluation.Feedback!);
}
/// <summary>
/// Verify that the default feedback template does not include the agent's latest response text (the last-response
/// placeholder is opt-in via a custom template).
/// </summary>
[Fact]
public async Task EvaluateAsync_MarkerAbsent_DefaultTemplate_DoesNotIncludeLastResponseAsync()
{
// Arrange
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
LoopContext context = CreateContext("candidate name: NoteNest");
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.Equal(CompletionMarkerLoopEvaluator.DefaultFeedbackMessageTemplate.Replace(CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder, "DONE"), evaluation.Feedback);
Assert.DoesNotContain("NoteNest", evaluation.Feedback!);
}
/// <summary>
/// Verify that EvaluateAsync throws when the context is null.
/// </summary>
[Fact]
public async Task EvaluateAsync_NullContext_ThrowsAsync()
{
// Arrange
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
// Act & Assert
await Assert.ThrowsAsync<ArgumentNullException>("context", async () => await evaluator.EvaluateAsync(null!));
}
private static LoopContext CreateContext(string responseText) => new(
new Mock<AIAgent>().Object,
new ChatClientAgentSession(),
[new ChatMessage(ChatRole.User, "go")],
new AgentResponse([new ChatMessage(ChatRole.Assistant, responseText)]));
}
@@ -0,0 +1,113 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Moq;
namespace Microsoft.Agents.AI.UnitTests;
/// <summary>
/// Unit tests for the <see cref="DelegateLoopEvaluator"/> class.
/// </summary>
public class DelegateLoopEvaluatorTests
{
/// <summary>
/// Verify that the constructor throws when the evaluate delegate is null.
/// </summary>
[Fact]
public void DelegateLoopEvaluator_NullDelegate_Throws()
{
// Act & Assert
Assert.Throws<ArgumentNullException>("evaluate", () => new DelegateLoopEvaluator(null!));
}
/// <summary>
/// Verify that EvaluateAsync throws when the context is null.
/// </summary>
[Fact]
public async Task EvaluateAsync_NullContext_ThrowsAsync()
{
// Arrange
var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask<LoopEvaluation>(LoopEvaluation.Stop()));
// Act & Assert
await Assert.ThrowsAsync<ArgumentNullException>("context", async () => await evaluator.EvaluateAsync(null!));
}
/// <summary>
/// Verify that EvaluateAsync invokes the supplied delegate and returns the evaluation it produces.
/// </summary>
[Fact]
public async Task EvaluateAsync_InvokesDelegate_AndReturnsItsEvaluationAsync()
{
// Arrange
bool invoked = false;
var expected = LoopEvaluation.Continue("feedback");
var evaluator = new DelegateLoopEvaluator((_, _) =>
{
invoked = true;
return new ValueTask<LoopEvaluation>(expected);
});
LoopContext context = CreateContext();
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.True(invoked);
Assert.Same(expected, evaluation);
}
/// <summary>
/// Verify that EvaluateAsync passes the same context instance to the delegate.
/// </summary>
[Fact]
public async Task EvaluateAsync_PassesContextToDelegateAsync()
{
// Arrange
LoopContext? received = null;
var evaluator = new DelegateLoopEvaluator((ctx, _) =>
{
received = ctx;
return new ValueTask<LoopEvaluation>(LoopEvaluation.Stop());
});
LoopContext context = CreateContext();
// Act
await evaluator.EvaluateAsync(context);
// Assert
Assert.Same(context, received);
}
/// <summary>
/// Verify that EvaluateAsync forwards the cancellation token to the delegate.
/// </summary>
[Fact]
public async Task EvaluateAsync_ForwardsCancellationTokenToDelegateAsync()
{
// Arrange
using var cts = new CancellationTokenSource();
CancellationToken received = default;
var evaluator = new DelegateLoopEvaluator((_, ct) =>
{
received = ct;
return new ValueTask<LoopEvaluation>(LoopEvaluation.Stop());
});
LoopContext context = CreateContext();
// Act
await evaluator.EvaluateAsync(context, cts.Token);
// Assert
Assert.Equal(cts.Token, received);
}
private static LoopContext CreateContext() => new(
new Mock<AIAgent>().Object,
new ChatClientAgentSession(),
[new ChatMessage(ChatRole.User, "go")],
new AgentResponse([new ChatMessage(ChatRole.Assistant, "response")]));
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,146 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Moq;
namespace Microsoft.Agents.AI.UnitTests;
/// <summary>
/// Unit tests for the <see cref="LoopContext"/> class, including its public constructor used to test custom evaluators.
/// </summary>
public class LoopContextTests
{
/// <summary>
/// Verify that the constructor throws when the agent is null.
/// </summary>
[Fact]
public void Constructor_NullAgent_Throws()
{
// Act & Assert
Assert.Throws<ArgumentNullException>("agent", () => new LoopContext(
null!, new ChatClientAgentSession(), [], CreateResponse()));
}
/// <summary>
/// Verify that the constructor throws when the session is null.
/// </summary>
[Fact]
public void Constructor_NullSession_Throws()
{
// Act & Assert
Assert.Throws<ArgumentNullException>("session", () => new LoopContext(
new Mock<AIAgent>().Object, null!, [], CreateResponse()));
}
/// <summary>
/// Verify that the constructor throws when the initial messages are null.
/// </summary>
[Fact]
public void Constructor_NullInitialMessages_Throws()
{
// Act & Assert
Assert.Throws<ArgumentNullException>("initialMessages", () => new LoopContext(
new Mock<AIAgent>().Object, new ChatClientAgentSession(), null!, CreateResponse()));
}
/// <summary>
/// Verify that the constructor throws when the last response is null.
/// </summary>
[Fact]
public void Constructor_NullLastResponse_Throws()
{
// Act & Assert
Assert.Throws<ArgumentNullException>("lastResponse", () => new LoopContext(
new Mock<AIAgent>().Object, new ChatClientAgentSession(), [], null!));
}
/// <summary>
/// Verify that the constructor populates the properties and that LastResponse is never null.
/// </summary>
[Fact]
public void Constructor_ValidArguments_SetsProperties()
{
// Arrange
var agent = new Mock<AIAgent>().Object;
var session = new ChatClientAgentSession();
ChatMessage[] initialMessages = [new ChatMessage(ChatRole.User, "go")];
var response = CreateResponse("done");
// Act
var context = new LoopContext(agent, session, initialMessages, response);
// Assert
Assert.Same(agent, context.Agent);
Assert.Same(session, context.Session);
Assert.Same(initialMessages, context.InitialMessages);
Assert.Same(response, context.LastResponse);
Assert.Null(context.RunOptions);
Assert.NotNull(context.AdditionalProperties);
Assert.Equal(0, context.Iteration);
Assert.Empty(context.Feedback);
}
/// <summary>
/// Verify that the session can be replaced through the internal setter (used by the loop for fresh contexts).
/// </summary>
[Fact]
public void Session_IsInternallySettable()
{
// Arrange
var context = new LoopContext(
new Mock<AIAgent>().Object, new ChatClientAgentSession(), [], CreateResponse());
var newSession = new ChatClientAgentSession();
// Act
context.Session = newSession;
// Assert
Assert.Same(newSession, context.Session);
}
/// <summary>
/// Verify that <see cref="LoopContext.Feedback"/> can be assigned through its internal setter.
/// </summary>
[Fact]
public void Feedback_IsInternallySettable()
{
// Arrange
var context = new LoopContext(
new Mock<AIAgent>().Object, new ChatClientAgentSession(), [], CreateResponse());
// Act
context.Feedback = ["first", null];
// Assert
Assert.Equal(["first", null], context.Feedback);
}
/// <summary>
/// Verify that an evaluator can be evaluated against a publicly-constructed context (the scenario the public
/// constructor exists to support).
/// </summary>
[Fact]
public async Task PubliclyConstructedContext_CanEvaluateEvaluatorAsync()
{
// Arrange
var context = new LoopContext(
new Mock<AIAgent>().Object,
new ChatClientAgentSession(),
[new ChatMessage(ChatRole.User, "go")],
CreateResponse("done"));
var evaluator = new DelegateLoopEvaluator((ctx, _) =>
new ValueTask<LoopEvaluation>(
ctx.LastResponse.Text == "done" ? LoopEvaluation.Stop() : LoopEvaluation.Continue()));
// Act
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
// Assert
Assert.False(evaluation.ShouldReinvoke);
}
private static AgentResponse CreateResponse(string text = "response") =>
new([new ChatMessage(ChatRole.Assistant, text)]);
}
@@ -0,0 +1,55 @@
// Copyright (c) Microsoft. All rights reserved.
namespace Microsoft.Agents.AI.UnitTests;
/// <summary>
/// Unit tests for the <see cref="LoopEvaluation"/> class.
/// </summary>
public class LoopEvaluationTests
{
/// <summary>
/// Verify that Stop produces an evaluation that does not re-invoke and carries no feedback.
/// </summary>
[Fact]
public void Stop_DoesNotReinvoke_AndHasNoFeedback()
{
// Act
var evaluation = LoopEvaluation.Stop();
// Assert
Assert.False(evaluation.ShouldReinvoke);
Assert.Null(evaluation.Feedback);
}
/// <summary>
/// Verify that Continue with no argument re-invokes and carries no feedback.
/// </summary>
[Fact]
public void Continue_NoFeedback_ReinvokesWithNullFeedback()
{
// Act
var evaluation = LoopEvaluation.Continue();
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.Null(evaluation.Feedback);
}
/// <summary>
/// Verify that Continue with whitespace-only feedback normalizes the feedback to null, matching the documented
/// "null, empty, or whitespace is treated as no feedback" semantics.
/// </summary>
[Theory]
[InlineData("")]
[InlineData(" ")]
[InlineData("\t\n")]
public void Continue_WhitespaceFeedback_NormalizesToNull(string feedback)
{
// Act
var evaluation = LoopEvaluation.Continue(feedback);
// Assert
Assert.True(evaluation.ShouldReinvoke);
Assert.Null(evaluation.Feedback);
}
}
@@ -0,0 +1,141 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Moq;
using Moq.Protected;
namespace Microsoft.Agents.AI.UnitTests;
/// <summary>
/// Shared helpers used by the LoopAgent and LoopEvaluator unit tests.
/// </summary>
internal static class LoopTestHelpers
{
/// <summary>
/// Creates a <see cref="DelegateLoopEvaluator"/> that re-invokes the agent (without feedback) while the
/// supplied predicate returns <see langword="true"/>.
/// </summary>
public static DelegateLoopEvaluator While(Func<LoopContext, bool> shouldReinvoke) =>
new((context, _) =>
new ValueTask<LoopEvaluation>(
shouldReinvoke(context) ? LoopEvaluation.Continue() : LoopEvaluation.Stop()));
/// <summary>
/// Creates a mocked judge <see cref="IChatClient"/> that always returns the supplied response text.
/// </summary>
public static IChatClient CreateJudgeClient(string responseText)
{
var mock = new Mock<IChatClient>();
mock.Setup(c => c.GetResponseAsync(
It.IsAny<IEnumerable<ChatMessage>>(),
It.IsAny<ChatOptions>(),
It.IsAny<CancellationToken>()))
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, responseText)));
return mock.Object;
}
/// <summary>
/// Creates a mocked judge <see cref="IChatClient"/> that always returns the supplied response text and captures the
/// messages it was invoked with via <paramref name="capturedMessages"/>.
/// </summary>
public static IChatClient CreateCapturingJudgeClient(string responseText, out List<ChatMessage> capturedMessages)
{
var captured = new List<ChatMessage>();
capturedMessages = captured;
var mock = new Mock<IChatClient>();
mock.Setup(c => c.GetResponseAsync(
It.IsAny<IEnumerable<ChatMessage>>(),
It.IsAny<ChatOptions>(),
It.IsAny<CancellationToken>()))
.Callback<IEnumerable<ChatMessage>, ChatOptions?, CancellationToken>((messages, _, _) =>
{
captured.Clear();
captured.AddRange(messages);
})
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, responseText)));
return mock.Object;
}
public static async IAsyncEnumerable<T> ToAsyncEnumerableAsync<T>(
IEnumerable<T> items,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
foreach (var item in items)
{
cancellationToken.ThrowIfCancellationRequested();
yield return item;
await Task.Yield();
}
}
}
/// <summary>
/// Captures the messages sent to a mocked non-streaming inner agent and produces responses by call index.
/// </summary>
internal sealed class InnerAgentCapture
{
public InnerAgentCapture(Func<int, AgentResponse> responseFactory)
{
this.Mock
.Protected()
.Setup<Task<AgentResponse>>("RunCoreAsync",
ItExpr.IsAny<IEnumerable<ChatMessage>>(),
ItExpr.IsAny<AgentSession?>(),
ItExpr.IsAny<AgentRunOptions?>(),
ItExpr.IsAny<CancellationToken>())
.Callback<IEnumerable<ChatMessage>, AgentSession?, AgentRunOptions?, CancellationToken>((msgs, session, _, _) =>
{
this.CallCount++;
this.MessagesPerCall.Add(msgs.ToList());
this.SessionsPerCall.Add(session);
})
.ReturnsAsync(() => responseFactory(this.CallCount));
}
public Mock<AIAgent> Mock { get; } = new();
public AIAgent Agent => this.Mock.Object;
public int CallCount { get; private set; }
public List<List<ChatMessage>> MessagesPerCall { get; } = [];
public List<AgentSession?> SessionsPerCall { get; } = [];
}
/// <summary>
/// Captures the messages sent to a mocked streaming inner agent and produces updates by call index.
/// </summary>
internal sealed class InnerStreamingCapture
{
public InnerStreamingCapture(Func<int, AgentResponseUpdate[]> updatesFactory)
{
this.Mock
.Protected()
.Setup<IAsyncEnumerable<AgentResponseUpdate>>("RunCoreStreamingAsync",
ItExpr.IsAny<IEnumerable<ChatMessage>>(),
ItExpr.IsAny<AgentSession?>(),
ItExpr.IsAny<AgentRunOptions?>(),
ItExpr.IsAny<CancellationToken>())
.Returns<IEnumerable<ChatMessage>, AgentSession?, AgentRunOptions?, CancellationToken>((msgs, _, _, ct) =>
{
this.CallCount++;
this.MessagesPerCall.Add(msgs.ToList());
return LoopTestHelpers.ToAsyncEnumerableAsync(updatesFactory(this.CallCount), ct);
});
}
public Mock<AIAgent> Mock { get; } = new();
public AIAgent Agent => this.Mock.Object;
public int CallCount { get; private set; }
public List<List<ChatMessage>> MessagesPerCall { get; } = [];
}