mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
.NET: Add LoopAgent capability for Harnesses (#6384)
* Add LoopAgent capability for Harnesses * Address PR comments. * Add support for returning user messages and response aggregation * Support fresh context per iteration with input sessions via cloning * Add ability to receive newly created sessions via callback * Address PR comments * Add judge criteria * Address PR comments
This commit is contained in:
committed by
GitHub
Unverified
parent
8e1998ddcb
commit
12ce099165
@@ -129,6 +129,7 @@
|
||||
<Project Path="samples/02-agents/Harness/Harness_Step02_Research_WithBackgroundAgents/Harness_Step02_Research_WithBackgroundAgents.csproj" />
|
||||
<Project Path="samples/02-agents/Harness/Harness_Step03_DataProcessing/Harness_Step03_DataProcessing.csproj" />
|
||||
<Project Path="samples/02-agents/Harness/Harness_Step04_CodeExecution/Harness_Step04_CodeExecution.csproj" />
|
||||
<Project Path="samples/02-agents/Harness/Harness_Step05_Loop/Harness_Step05_Loop.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/Samples/02-agents/AGUI/Step05_StateManagement/">
|
||||
<Project Path="samples/02-agents/AGUI/Step05_StateManagement/Client/Client.csproj" />
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFrameworks>net10.0</TargetFrameworks>
|
||||
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Azure.AI.Projects" />
|
||||
<PackageReference Include="Azure.Identity" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\..\src\Microsoft.Agents.AI.Foundry\Microsoft.Agents.AI.Foundry.csproj" />
|
||||
<ProjectReference Include="..\..\..\..\src\Microsoft.Agents.AI.Harness\Microsoft.Agents.AI.Harness.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,272 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
// This sample demonstrates how to wrap a HarnessAgent with the LoopAgent decorator to re-invoke
|
||||
// the agent until a configured LoopEvaluator decides to stop. It covers the common looping patterns
|
||||
// through one decorator, each driven by a different evaluator:
|
||||
//
|
||||
// 1. Completion-marker (Ralph-style) loop — keep refining until the agent emits a completion
|
||||
// marker, restarting each pass from a fresh context (CompletionMarkerLoopEvaluator +
|
||||
// FreshContextPerIteration).
|
||||
// 2. Delegate predicate (todos remaining) — loop while the built-in TodoProvider still has open
|
||||
// items (DelegateLoopEvaluator).
|
||||
// 3. AI judge — a second chat client decides whether the original request was answered, and the
|
||||
// loop continues while the answer is "no" (AIJudgeLoopEvaluator).
|
||||
// 4. Approval heuristics + loop — combine the LoopAgent with the ToolApprovalAgent auto-approval
|
||||
// heuristics so a looped agent auto-approves tool calls instead of stalling on approval.
|
||||
//
|
||||
// The demos run sequentially and print each loop's final response.
|
||||
|
||||
#pragma warning disable OPENAI001 // Suppress experimental API warnings for Responses API usage.
|
||||
#pragma warning disable MAAI001 // Suppress experimental API warnings for Agents AI experiments.
|
||||
|
||||
using System.ClientModel.Primitives;
|
||||
using System.ComponentModel;
|
||||
using Azure.AI.Projects;
|
||||
using Azure.Identity;
|
||||
using Microsoft.Agents.AI;
|
||||
using Microsoft.Extensions.AI;
|
||||
|
||||
var endpoint = Environment.GetEnvironmentVariable("AZURE_AI_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_AI_PROJECT_ENDPOINT is not set.");
|
||||
var deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_MODEL_DEPLOYMENT_NAME") ?? "gpt-5.4";
|
||||
|
||||
// The HarnessAgent pre-configures function invocation, per-service-call chat history persistence, and
|
||||
// context-window compaction. These bounds size the in-loop compaction window.
|
||||
const int MaxContextWindowTokens = 1_050_000;
|
||||
const int MaxOutputTokens = 32_000;
|
||||
|
||||
// Build a single Foundry-backed IChatClient factory shared by every demo. Each call returns a fresh
|
||||
// IChatClient over the same Responses endpoint.
|
||||
var projectClient = new AIProjectClient(
|
||||
new Uri(endpoint),
|
||||
// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
|
||||
// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid
|
||||
// latency issues, unintended credential probing, and potential security risks from fallback mechanisms.
|
||||
new DefaultAzureCredential(),
|
||||
new AIProjectClientOptions { RetryPolicy = new ClientRetryPolicy(3) });
|
||||
|
||||
IChatClient CreateChatClient() =>
|
||||
projectClient.GetProjectOpenAIClient().GetResponsesClient().AsIChatClient(deploymentName);
|
||||
|
||||
await RalphLoopAsync();
|
||||
await TodoLoopAsync();
|
||||
await JudgeLoopAsync();
|
||||
await ApprovalLoopAsync();
|
||||
|
||||
// Pattern 1: a "Ralph"-style loop that refines until the agent signals completion.
|
||||
async Task RalphLoopAsync()
|
||||
{
|
||||
Console.WriteLine("\n=== 1. Completion-marker (Ralph-style) loop — refine until <promise>COMPLETE</promise> (max 5) ===");
|
||||
|
||||
// Build a lean HarnessAgent: no todo or mode providers for this iterative-refinement task.
|
||||
AIAgent harnessAgent = CreateLeanHarnessAgent(
|
||||
name: "ralph",
|
||||
instructions:
|
||||
"""
|
||||
You are iteratively refining a product name for a note-taking app. Each turn, build on the
|
||||
feedback so far: propose an improved candidate with a short reason. When you are confident the
|
||||
name is final, end your message with the exact marker <promise>COMPLETE</promise>.
|
||||
""");
|
||||
|
||||
// CompletionMarkerLoopEvaluator stops once the marker appears in the response; until then it
|
||||
// re-invokes the agent. FreshContextPerIteration restarts each pass from the original task plus the
|
||||
// aggregated feedback log on a brand-new session. Because each pass starts fresh, the agent has no
|
||||
// memory of its prior suggestion — so the feedback template includes the {last_response} placeholder
|
||||
// to echo the previous candidate back to it.
|
||||
AIAgent loopAgent = new LoopAgent(
|
||||
harnessAgent,
|
||||
new CompletionMarkerLoopEvaluator("<promise>COMPLETE</promise>", options: new()
|
||||
{
|
||||
FeedbackMessageTemplate =
|
||||
"Your previous suggestion was:\n" + CompletionMarkerLoopEvaluator.LastResponsePlaceholder +
|
||||
"\n\nContinue to refine the name and remember to reply with " +
|
||||
CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when happy.",
|
||||
}),
|
||||
new LoopAgentOptions { MaxIterations = 5, FreshContextPerIteration = true });
|
||||
|
||||
AgentResponse response = await StreamLoopAsync(loopAgent, "Suggest a name for a note-taking app.");
|
||||
Console.WriteLine($"\nFinal response:\n{response.Text}");
|
||||
}
|
||||
|
||||
// Pattern 2: loop while the built-in TodoProvider still has open items.
|
||||
async Task TodoLoopAsync()
|
||||
{
|
||||
Console.WriteLine("\n=== 2. Delegate predicate — loop while todos remain (max 6) ===");
|
||||
|
||||
// Keep the built-in TodoProvider enabled (only the mode provider is disabled) so the agent has
|
||||
// todo tools to plan and track work.
|
||||
AIAgent harnessAgent = CreateLeanHarnessAgent(
|
||||
name: "planner",
|
||||
instructions:
|
||||
"""
|
||||
You are a planning assistant. First break the task into todo items using your todo tools.
|
||||
Then, on each turn, make progress and mark completed items as done. When all items are
|
||||
complete, summarize the result.
|
||||
""",
|
||||
disableTodoProvider: false);
|
||||
|
||||
// The predicate re-invokes the agent while any todo item is still open. The evaluator fetches the
|
||||
// built-in TodoProvider from context.Agent (via GetService, which forwards through the harness
|
||||
// decorators to the underlying ChatClientAgent's context providers), keeping the delegate
|
||||
// self-contained, then queries it against the loop's current session. When items remain, it returns
|
||||
// feedback telling the agent to finish them. MaxIterations guarantees the loop stops even if the
|
||||
// agent stalls.
|
||||
AIAgent loopAgent = new LoopAgent(
|
||||
harnessAgent,
|
||||
new DelegateLoopEvaluator(async (context, cancellationToken) =>
|
||||
{
|
||||
var todoProvider = context.Agent.GetService<TodoProvider>()
|
||||
?? throw new InvalidOperationException("The agent did not expose a TodoProvider.");
|
||||
var remaining = await todoProvider.GetRemainingTodosAsync(context.Session).ConfigureAwait(false);
|
||||
return remaining.Count > 0
|
||||
? LoopEvaluation.Continue($"Not all todos are complete yet ({remaining.Count} remaining). Please complete the remaining todo items.")
|
||||
: LoopEvaluation.Stop();
|
||||
}),
|
||||
new LoopAgentOptions { MaxIterations = 6 });
|
||||
|
||||
// The LoopAgent creates a single session up front and reuses it across iterations (non-fresh
|
||||
// mode), so the todo state persists; the predicate reads it via context.Session.
|
||||
AgentResponse response = await StreamLoopAsync(
|
||||
loopAgent,
|
||||
"Plan and outline a 3-section blog post about Rayleigh scattering.");
|
||||
Console.WriteLine($"\nFinal response:\n{response.Text}");
|
||||
}
|
||||
|
||||
// Pattern 3: a second chat client judges whether the original request was answered.
|
||||
async Task JudgeLoopAsync()
|
||||
{
|
||||
Console.WriteLine("\n=== 3. AI judge — loop until the request is answered (max 4) ===");
|
||||
|
||||
AIAgent harnessAgent = CreateLeanHarnessAgent(
|
||||
name: "answerer",
|
||||
instructions: "You are a helpful assistant. Answer the user's question thoroughly.");
|
||||
|
||||
// The judge uses its own IChatClient. AIJudgeLoopEvaluator asks it (via a JudgeVerdict structured
|
||||
// output) whether the original request has been fully addressed and continues while the answer is
|
||||
// "no", injecting the judge's gap analysis as the next iteration's input. Judge loops use a small
|
||||
// MaxIterations cap because each pass costs an extra model call.
|
||||
AIAgent loopAgent = new LoopAgent(
|
||||
harnessAgent,
|
||||
new AIJudgeLoopEvaluator(CreateChatClient()),
|
||||
new LoopAgentOptions { MaxIterations = 4 });
|
||||
|
||||
AgentResponse response = await StreamLoopAsync(
|
||||
loopAgent,
|
||||
"Explain why the sky is blue, then also explain why sunsets are red.");
|
||||
Console.WriteLine($"\nFinal response:\n{response.Text}");
|
||||
}
|
||||
|
||||
// Pattern 4: combine the loop with the ToolApprovalAgent auto-approval heuristics.
|
||||
async Task ApprovalLoopAsync()
|
||||
{
|
||||
Console.WriteLine("\n=== 4. Approval heuristics + loop — auto-approve tool calls in the loop (max 2) ===");
|
||||
|
||||
var deployTool = new ApprovalRequiredAIFunction(
|
||||
AIFunctionFactory.Create(DeploymentTools.DeployService));
|
||||
|
||||
// Configure the HarnessAgent's built-in ToolApprovalAgent with an auto-approval rule. The rule
|
||||
// approves the deploy_service call without prompting, so the inner agent resolves the approval
|
||||
// internally and never surfaces a pending approval to the LoopAgent — letting the loop proceed.
|
||||
AIAgent harnessAgent = CreateLeanHarnessAgent(
|
||||
name: "operator",
|
||||
instructions: "You are a deployment operator. Use the DeployService tool to fulfil requests.",
|
||||
tools: [deployTool],
|
||||
toolApprovalAgentOptions: new ToolApprovalAgentOptions
|
||||
{
|
||||
AutoApprovalRules =
|
||||
[
|
||||
functionCall =>
|
||||
{
|
||||
Console.WriteLine($" Auto-approving: {functionCall.Name}");
|
||||
return ValueTask.FromResult(true);
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Drive a short loop that continues until the response confirms the deployment.
|
||||
AIAgent loopAgent = new LoopAgent(
|
||||
harnessAgent,
|
||||
new DelegateLoopEvaluator((context, _) =>
|
||||
new ValueTask<LoopEvaluation>(
|
||||
context.LastResponse.Text.Contains("deployed", StringComparison.OrdinalIgnoreCase)
|
||||
? LoopEvaluation.Stop()
|
||||
: LoopEvaluation.Continue())),
|
||||
new LoopAgentOptions { MaxIterations = 2 });
|
||||
|
||||
// The LoopAgent reuses a single session across iterations, so the approval response flows back in.
|
||||
AgentResponse response = await StreamLoopAsync(loopAgent, "Deploy the billing service.");
|
||||
Console.WriteLine($"\nFinal response:\n{response.Text}");
|
||||
}
|
||||
|
||||
// Streams a loop run to the console, printing updates live and marking each new inner run (detected
|
||||
// via a change in ResponseId) with an "--- run N ---" header so you can see when the LoopAgent
|
||||
// re-invokes the inner agent. Each message is prefixed with "User:" or "Agent:" based on its role, so
|
||||
// the loop's on-behalf-of feedback (User) is visually distinct from the agent's responses (Agent).
|
||||
// Returns the aggregated final response.
|
||||
static async Task<AgentResponse> StreamLoopAsync(AIAgent loopAgent, string input, AgentSession? session = null)
|
||||
{
|
||||
string? currentResponseId = null;
|
||||
ChatRole? currentRole = null;
|
||||
var runCount = 0;
|
||||
var updates = new List<AgentResponseUpdate>();
|
||||
|
||||
await foreach (var update in loopAgent.RunStreamingAsync(input, session))
|
||||
{
|
||||
// A new ResponseId signals the start of another inner run (loop iteration).
|
||||
if (update.ResponseId is { } responseId && responseId != currentResponseId)
|
||||
{
|
||||
currentResponseId = responseId;
|
||||
currentRole = null;
|
||||
Console.WriteLine($"\n--- run {++runCount} ---");
|
||||
}
|
||||
|
||||
// Print a role-based prefix whenever the speaker changes — for example the loop's on-behalf-of
|
||||
// user feedback versus the agent's response.
|
||||
if (update.Role is { } role && role != currentRole)
|
||||
{
|
||||
currentRole = role;
|
||||
var prefix = role == ChatRole.User ? "User" : role == ChatRole.Assistant ? "Agent" : role.Value;
|
||||
Console.Write($"\n{prefix}: ");
|
||||
}
|
||||
|
||||
Console.Write(update.Text);
|
||||
updates.Add(update);
|
||||
}
|
||||
|
||||
Console.WriteLine();
|
||||
return updates.ToAgentResponse();
|
||||
}
|
||||
|
||||
// Creates a HarnessAgent with the agent-mode provider always disabled (and the todo provider disabled
|
||||
// by default), plus all other heavyweight providers turned off so each loop demo stays focused.
|
||||
AIAgent CreateLeanHarnessAgent(
|
||||
string name,
|
||||
string instructions,
|
||||
bool disableTodoProvider = true,
|
||||
IList<AITool>? tools = null,
|
||||
ToolApprovalAgentOptions? toolApprovalAgentOptions = null) =>
|
||||
CreateChatClient().AsHarnessAgent(new HarnessAgentOptions
|
||||
{
|
||||
Name = name,
|
||||
MaxContextWindowTokens = MaxContextWindowTokens,
|
||||
MaxOutputTokens = MaxOutputTokens,
|
||||
DisableAgentModeProvider = true,
|
||||
DisableTodoProvider = disableTodoProvider,
|
||||
DisableFileMemory = true,
|
||||
DisableFileAccess = true,
|
||||
DisableWebSearch = true,
|
||||
ToolApprovalAgentOptions = toolApprovalAgentOptions,
|
||||
ChatOptions = new ChatOptions
|
||||
{
|
||||
Instructions = instructions,
|
||||
Tools = tools,
|
||||
MaxOutputTokens = MaxOutputTokens,
|
||||
},
|
||||
});
|
||||
|
||||
/// <summary>Tool used by the approval-handling demo.</summary>
|
||||
internal static class DeploymentTools
|
||||
{
|
||||
[Description("Deploy a service to production (requires approval).")]
|
||||
public static string DeployService([Description("The name of the service to deploy.")] string service) =>
|
||||
$"Deployed {service} to production.";
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
# What this sample demonstrates
|
||||
|
||||
This sample demonstrates how to wrap a `HarnessAgent` with the **`LoopAgent`** decorator to re-invoke the agent until a configured **`LoopEvaluator`** decides to stop. A single decorator covers the common looping patterns — you just plug in a different evaluator (and optionally switch on fresh-context mode).
|
||||
|
||||
The `HarnessAgent` pre-configures function invocation, per-service-call chat history persistence, and in-loop compaction, so each demo only supplies the chat client, token limits, and instructions, then wraps the result with a `LoopAgent`.
|
||||
|
||||
## Looping patterns showcased
|
||||
|
||||
The program runs four demos sequentially, each driven by a different evaluator:
|
||||
|
||||
| # | Pattern | Evaluator | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| 1 | Completion-marker ("Ralph"-style) loop | `CompletionMarkerLoopEvaluator` | Re-invokes until the agent emits `<promise>COMPLETE</promise>`. Uses `FreshContextPerIteration = true` to restart each pass from the original task plus the aggregated feedback log on a new session, and includes the `{last_response}` placeholder in the feedback template so the agent sees its previous suggestion even though each pass starts fresh. |
|
||||
| 2 | Delegate predicate (todos remaining) | `DelegateLoopEvaluator` | Loops while the built-in `TodoProvider` still has open items. The provider is fetched from the agent via `GetService<TodoProvider>()` and queried against the loop's current session. |
|
||||
| 3 | AI judge | `AIJudgeLoopEvaluator` | A second `IChatClient` judges whether the original request was fully answered and continues while the answer is "no", injecting its gap analysis as the next input. |
|
||||
| 4 | Approval heuristics + loop | `DelegateLoopEvaluator` + `ToolApprovalAgent` | Combines the `ToolApprovalAgent` auto-approval heuristics (`AutoApprovalRules`) with the loop, so a looped agent auto-approves tool calls instead of stalling on a pending approval. |
|
||||
|
||||
`MaxIterations` caps every loop so it always terminates even if the evaluator never stops.
|
||||
|
||||
### Evaluator mapping (Python → .NET)
|
||||
|
||||
The Python sample in [microsoft/agent-framework#6174](https://github.com/microsoft/agent-framework/pull/6174) exposes several distinct loop classes. In .NET these collapse into one `LoopAgent` that consumes evaluators:
|
||||
|
||||
| Python | .NET |
|
||||
| --- | --- |
|
||||
| Ralph loop (completion marker) | `LoopAgent` + `CompletionMarkerLoopEvaluator` |
|
||||
| Ralph loop (fresh context each pass) | `LoopAgent` + `CompletionMarkerLoopEvaluator` + `FreshContextPerIteration = true` |
|
||||
| Callable / predicate loop | `LoopAgent` + `DelegateLoopEvaluator` |
|
||||
| AI judge loop | `LoopAgent` + `AIJudgeLoopEvaluator` |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before running this sample, ensure you have:
|
||||
|
||||
1. An Azure AI Foundry project with a deployed model (e.g., `gpt-5.4`)
|
||||
2. Azure CLI installed and authenticated (`az login`)
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set the following environment variables:
|
||||
|
||||
```bash
|
||||
# Required: Your Azure AI Foundry project endpoint
|
||||
export AZURE_AI_PROJECT_ENDPOINT="https://your-project.services.ai.azure.com/api/projects/your-project"
|
||||
|
||||
# Optional: Model deployment name (defaults to gpt-5.4)
|
||||
export AZURE_AI_MODEL_DEPLOYMENT_NAME="gpt-5.4"
|
||||
```
|
||||
|
||||
## Running the Sample
|
||||
|
||||
```bash
|
||||
cd dotnet
|
||||
dotnet run --project samples/02-agents/Harness/Harness_Step05_Loop
|
||||
```
|
||||
|
||||
## What to Expect
|
||||
|
||||
The program runs the four demos in order. Each loop is executed with `RunStreamingAsync`, so output is printed live and every re-invocation of the inner agent is marked with a `--- run N ---` header (detected via a change in the streamed `ResponseId`) — this lets you see exactly when the `LoopAgent` loops. Each streamed message is prefixed with `User:` or `Agent:` based on its role, so the loop's on-behalf-of feedback messages (surfaced as `User` turns) are visually distinct from the agent's responses (`Agent`). Each demo finishes by printing its aggregated final response. Demo 4 also prints an `Auto-approving: ...` line each time the `ToolApprovalAgent` heuristic approves the `DeployService` tool call, showing how approval-aware agents integrate with the loop.
|
||||
@@ -9,3 +9,4 @@ Samples demonstrating the [Harness AIContextProviders](../../../src/Microsoft.Ag
|
||||
| [Harness_Step01_Research](./Harness_Step01_Research/README.md) | Using a ChatClientAgent with TodoProvider and AgentModeProvider for research, showcasing planning mode and todo management |
|
||||
| [Harness_Step02_Research_WithBackgroundAgents](./Harness_Step02_Research_WithBackgroundAgents/README.md) | Using BackgroundAgentsProvider to delegate stock price lookups to a web-search background agent concurrently |
|
||||
| [Harness_Step03_DataProcessing](./Harness_Step03_DataProcessing/README.md) | Using FileAccessProvider to give an agent access to CSV data files for reading, analysis, and output generation |
|
||||
| [Harness_Step05_Loop](./Harness_Step05_Loop/README.md) | Wrapping a HarnessAgent with the LoopAgent decorator to re-invoke it until a configured LoopEvaluator (completion marker, predicate, AI judge, or approval-aware loop) decides to stop |
|
||||
|
||||
@@ -0,0 +1,201 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
using Microsoft.Shared.Diagnostics;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// A <see cref="LoopEvaluator"/> that uses a separate judge chat client to decide whether the user's original request
|
||||
/// has been fully addressed, continuing the loop (with the judge's gap analysis as feedback) while the answer is "no".
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// After each iteration the judge is queried directly (without any agent tools, session, or middleware) with the
|
||||
/// original request and the agent's latest response, and asked for a structured <see cref="JudgeVerdict"/>. If the
|
||||
/// judge client does not honor structured output, the verdict falls back to parsing the raw text for the
|
||||
/// non-overlapping <see cref="DoneVerdictMarker"/> / <see cref="MoreVerdictMarker"/> markers (with
|
||||
/// <see cref="MoreVerdictMarker"/> winning, so the loop keeps running, when the verdict is ambiguous or absent).
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// When the request is not yet answered, the evaluator returns feedback built from
|
||||
/// <see cref="AIJudgeLoopEvaluatorOptions.FeedbackMessageTemplate"/> with the judge's gap analysis substituted for
|
||||
/// <see cref="GapAnalysisPlaceholder"/>. How that feedback is delivered to the agent (and whether the session is
|
||||
/// reset) is decided by the <see cref="LoopAgent"/> that consumes this evaluator.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The judge instructions act as a template: any occurrence of <see cref="CriteriaPlaceholder"/> is replaced with the
|
||||
/// rendered <see cref="AIJudgeLoopEvaluatorOptions.Criteria"/> (or removed when no criteria are supplied), letting
|
||||
/// callers add bespoke standards the response must satisfy.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// LLM-judged loops are costly and probabilistic, so consider setting a stricter
|
||||
/// <see cref="LoopAgentOptions.MaxIterations"/> on the owning <see cref="LoopAgent"/>.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class AIJudgeLoopEvaluator : LoopEvaluator
|
||||
{
|
||||
/// <summary>The default system instructions used to prompt the judge.</summary>
|
||||
/// <remarks>
|
||||
/// Acts as a template: the trailing <see cref="CriteriaPlaceholder"/> is replaced with the rendered
|
||||
/// <see cref="AIJudgeLoopEvaluatorOptions.Criteria"/> (or removed when none are supplied).
|
||||
/// </remarks>
|
||||
public const string DefaultInstructions =
|
||||
"You are an evaluator. You are given a user's original request and an agent's latest response. " +
|
||||
"Decide whether the agent has fully addressed the original request. " +
|
||||
"Set 'answered' to true if the request has been fully addressed, or false if more work is still required. " +
|
||||
"When 'answered' is false, use 'gapAnalysis' to explain what is still missing or what work remains. " +
|
||||
"If you cannot return structured output, reply with " + DoneVerdictMarker + " when the request has been fully " +
|
||||
"addressed, or " + MoreVerdictMarker + " when more work is still required." +
|
||||
CriteriaPlaceholder;
|
||||
|
||||
/// <summary>
|
||||
/// The verdict marker the judge is asked to emit (for clients that do not honor structured output) when the
|
||||
/// original request has been fully addressed.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <see cref="DoneVerdictMarker"/> and <see cref="MoreVerdictMarker"/> are deliberately non-overlapping (neither is
|
||||
/// a substring of the other), so the text fallback cannot misclassify one verdict as the other. When the marker is
|
||||
/// ambiguous or absent, <see cref="MoreVerdictMarker"/> wins so the loop keeps running rather than stopping on an
|
||||
/// incomplete answer.
|
||||
/// </remarks>
|
||||
public const string DoneVerdictMarker = "VERDICT: DONE";
|
||||
|
||||
/// <summary>
|
||||
/// The verdict marker the judge is asked to emit (for clients that do not honor structured output) when more work
|
||||
/// is still required. Takes precedence over <see cref="DoneVerdictMarker"/> when both (or neither) are present.
|
||||
/// </summary>
|
||||
public const string MoreVerdictMarker = "VERDICT: MORE";
|
||||
|
||||
/// <summary>
|
||||
/// The placeholder token within <see cref="DefaultInstructions"/> (or a custom
|
||||
/// <see cref="AIJudgeLoopEvaluatorOptions.Instructions"/>) that is replaced with the rendered
|
||||
/// <see cref="AIJudgeLoopEvaluatorOptions.Criteria"/>. When no criteria are supplied, the placeholder is removed.
|
||||
/// </summary>
|
||||
public const string CriteriaPlaceholder = "{criteria}";
|
||||
|
||||
/// <summary>
|
||||
/// The placeholder token within <see cref="DefaultFeedbackMessageTemplate"/> (or a custom
|
||||
/// <see cref="AIJudgeLoopEvaluatorOptions.FeedbackMessageTemplate"/>) that is replaced with the judge's gap analysis.
|
||||
/// </summary>
|
||||
public const string GapAnalysisPlaceholder = "{gap_analysis}";
|
||||
|
||||
/// <summary>The default template used to build the feedback produced when the request is not yet answered.</summary>
|
||||
public const string DefaultFeedbackMessageTemplate =
|
||||
"Your previous response did not fully address the original request. " +
|
||||
"The following is still missing or incomplete: " + GapAnalysisPlaceholder + " " +
|
||||
"Please continue and fully address the original request.";
|
||||
|
||||
/// <summary>The value substituted for the gap analysis when the judge did not provide one.</summary>
|
||||
private const string UnknownGapAnalysis = "<unknown>";
|
||||
|
||||
private readonly IChatClient _judgeClient;
|
||||
private readonly string _instructions;
|
||||
private readonly string _feedbackMessageTemplate;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="AIJudgeLoopEvaluator"/> class.
|
||||
/// </summary>
|
||||
/// <param name="judgeClient">The chat client used to judge whether the original request was answered.</param>
|
||||
/// <param name="options">Optional configuration for the judge. When <see langword="null"/>, defaults are used.</param>
|
||||
/// <exception cref="ArgumentNullException"><paramref name="judgeClient"/> is <see langword="null"/>.</exception>
|
||||
public AIJudgeLoopEvaluator(IChatClient judgeClient, AIJudgeLoopEvaluatorOptions? options = null)
|
||||
{
|
||||
this._judgeClient = Throw.IfNull(judgeClient);
|
||||
this._instructions = (options?.Instructions ?? DefaultInstructions)
|
||||
.Replace(CriteriaPlaceholder, RenderCriteria(options?.Criteria));
|
||||
this._feedbackMessageTemplate = options?.FeedbackMessageTemplate ?? DefaultFeedbackMessageTemplate;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
_ = Throw.IfNull(context);
|
||||
|
||||
// Build the judge's user message from AIContent so non-text request content (images, data, etc.) is
|
||||
// preserved rather than flattened to text. The original request's contents are framed between header
|
||||
// text segments, followed by the agent's latest response text.
|
||||
var userContents = new List<AIContent>
|
||||
{
|
||||
new TextContent("# Has the original request been fully addressed?\n\n## Original request:\n"),
|
||||
};
|
||||
foreach (ChatMessage message in context.InitialMessages)
|
||||
{
|
||||
userContents.AddRange(message.Contents);
|
||||
}
|
||||
|
||||
userContents.Add(new TextContent($"\n\n## Agent's latest response:\n{context.LastResponse.Text}"));
|
||||
|
||||
List<ChatMessage> judgeMessages =
|
||||
[
|
||||
new ChatMessage(ChatRole.System, this._instructions),
|
||||
new ChatMessage(ChatRole.User, userContents),
|
||||
];
|
||||
|
||||
bool answered;
|
||||
string gapAnalysis = UnknownGapAnalysis;
|
||||
ChatResponse<JudgeVerdict> response = await this._judgeClient
|
||||
.GetResponseAsync<JudgeVerdict>(judgeMessages, LoopJsonContext.Default.Options, cancellationToken: cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (response.TryGetResult(out JudgeVerdict? verdict) && verdict is not null)
|
||||
{
|
||||
answered = verdict.Answered;
|
||||
if (!string.IsNullOrWhiteSpace(verdict.GapAnalysis))
|
||||
{
|
||||
gapAnalysis = verdict.GapAnalysis;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fallback for clients that do not honor structured output: look for the explicit, non-overlapping verdict
|
||||
// markers. MoreVerdictMarker wins so an ambiguous or marker-less reply keeps looping rather than stopping
|
||||
// on an incomplete answer.
|
||||
string text = response.Text.ToUpperInvariant();
|
||||
answered = !text.Contains(MoreVerdictMarker) && text.Contains(DoneVerdictMarker);
|
||||
}
|
||||
|
||||
// The request is answered: stop looping.
|
||||
if (answered)
|
||||
{
|
||||
return LoopEvaluation.Stop();
|
||||
}
|
||||
|
||||
// Not yet answered: continue, providing feedback describing what is still missing.
|
||||
string feedback = this._feedbackMessageTemplate.Replace(GapAnalysisPlaceholder, gapAnalysis);
|
||||
return LoopEvaluation.Continue(feedback);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Renders the supplied <paramref name="criteria"/> into a bullet block appended at <see cref="CriteriaPlaceholder"/>,
|
||||
/// or an empty string when no non-blank criteria are supplied.
|
||||
/// </summary>
|
||||
private static string RenderCriteria(IEnumerable<string>? criteria)
|
||||
{
|
||||
if (criteria is null)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var builder = new StringBuilder();
|
||||
foreach (string criterion in criteria)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(criterion))
|
||||
{
|
||||
builder.Append("\n- ").Append(criterion);
|
||||
}
|
||||
}
|
||||
|
||||
return builder.Length == 0
|
||||
? string.Empty
|
||||
: "\n\nThe response must satisfy all of the following criteria:" + builder;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Provides configuration options for <see cref="AIJudgeLoopEvaluator"/>.
|
||||
/// </summary>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class AIJudgeLoopEvaluatorOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the system instructions used to prompt the judge, or <see langword="null"/> to use
|
||||
/// <see cref="AIJudgeLoopEvaluator.DefaultInstructions"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Any occurrence of <see cref="AIJudgeLoopEvaluator.CriteriaPlaceholder"/> in the instructions is replaced with
|
||||
/// the rendered <see cref="Criteria"/> (or removed when no criteria are supplied). Instructions that omit the
|
||||
/// placeholder do not receive the criteria.
|
||||
/// </remarks>
|
||||
public string? Instructions { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets an optional list of additional criteria the agent's response must satisfy, evaluated by the judge
|
||||
/// alongside the original request.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When supplied, the criteria are rendered into the judge instructions wherever
|
||||
/// <see cref="AIJudgeLoopEvaluator.CriteriaPlaceholder"/> appears (including in
|
||||
/// <see cref="AIJudgeLoopEvaluator.DefaultInstructions"/>). When <see langword="null"/> or empty, the placeholder is
|
||||
/// removed and no criteria are added.
|
||||
/// </remarks>
|
||||
public IEnumerable<string>? Criteria { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the template used to build the feedback produced when the judge decides the original request was
|
||||
/// not fully addressed, or <see langword="null"/> to use
|
||||
/// <see cref="AIJudgeLoopEvaluator.DefaultFeedbackMessageTemplate"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Any occurrence of <see cref="AIJudgeLoopEvaluator.GapAnalysisPlaceholder"/> in the template is replaced with the
|
||||
/// judge's gap analysis (or a placeholder when none is available).
|
||||
/// </remarks>
|
||||
public string? FeedbackMessageTemplate { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
using Microsoft.Shared.Diagnostics;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// A <see cref="LoopEvaluator"/> that stops the loop once a configured marker string appears in the agent's latest
|
||||
/// response, and otherwise continues with feedback asking the agent to keep working and to emit the marker when done.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The feedback produced while the marker is absent is built from a template (see
|
||||
/// <see cref="CompletionMarkerLoopEvaluatorOptions.FeedbackMessageTemplate"/>) with the configured marker substituted
|
||||
/// for <see cref="CompletionMarkerPlaceholder"/>, and the agent's latest response text substituted for
|
||||
/// <see cref="LastResponsePlaceholder"/>. How that feedback is delivered to the agent (and whether the session
|
||||
/// is reset) is decided by the <see cref="LoopAgent"/> that consumes this evaluator.
|
||||
/// </remarks>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class CompletionMarkerLoopEvaluator : LoopEvaluator
|
||||
{
|
||||
/// <summary>
|
||||
/// The placeholder token within <see cref="DefaultFeedbackMessageTemplate"/> (or a custom
|
||||
/// <see cref="CompletionMarkerLoopEvaluatorOptions.FeedbackMessageTemplate"/>) that is replaced with the
|
||||
/// configured completion marker.
|
||||
/// </summary>
|
||||
public const string CompletionMarkerPlaceholder = "{completion_marker}";
|
||||
|
||||
/// <summary>
|
||||
/// The placeholder token within a custom <see cref="CompletionMarkerLoopEvaluatorOptions.FeedbackMessageTemplate"/>
|
||||
/// that is replaced with the text of the agent's latest response. This is substituted on each evaluation, so it lets
|
||||
/// the feedback echo back what the agent previously produced — useful when the consuming
|
||||
/// <see cref="LoopAgent"/> uses <see cref="LoopAgentOptions.FreshContextPerIteration"/>, where the agent would
|
||||
/// otherwise have no record of its prior output.
|
||||
/// </summary>
|
||||
public const string LastResponsePlaceholder = "{last_response}";
|
||||
|
||||
/// <summary>The default template used to build the feedback produced while the completion marker is absent.</summary>
|
||||
public const string DefaultFeedbackMessageTemplate =
|
||||
"Continue working on the request. When you have fully completed the task, end your response with the marker '" +
|
||||
CompletionMarkerPlaceholder + "' to indicate completion.";
|
||||
|
||||
private readonly string _completionMarker;
|
||||
private readonly string _feedbackMessageTemplate;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CompletionMarkerLoopEvaluator"/> class.
|
||||
/// </summary>
|
||||
/// <param name="completionMarker">The marker string that stops the loop once it appears in the agent's latest response text.</param>
|
||||
/// <param name="options">Optional configuration for the feedback message. When <see langword="null"/>, defaults are used.</param>
|
||||
/// <exception cref="System.ArgumentException"><paramref name="completionMarker"/> is <see langword="null"/>, empty, or whitespace.</exception>
|
||||
public CompletionMarkerLoopEvaluator(string completionMarker, CompletionMarkerLoopEvaluatorOptions? options = null)
|
||||
{
|
||||
this._completionMarker = Throw.IfNullOrWhitespace(completionMarker);
|
||||
|
||||
// The completion marker is fixed, so substitute it once here. The optional {last_response} placeholder depends
|
||||
// on the per-iteration response text, so it is substituted later in EvaluateAsync.
|
||||
this._feedbackMessageTemplate = (options?.FeedbackMessageTemplate ?? DefaultFeedbackMessageTemplate)
|
||||
.Replace(CompletionMarkerPlaceholder, this._completionMarker);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
_ = Throw.IfNull(context);
|
||||
|
||||
if (context.LastResponse.Text.Contains(this._completionMarker))
|
||||
{
|
||||
return new ValueTask<LoopEvaluation>(LoopEvaluation.Stop());
|
||||
}
|
||||
|
||||
string feedback = this._feedbackMessageTemplate.Replace(LastResponsePlaceholder, context.LastResponse.Text);
|
||||
return new ValueTask<LoopEvaluation>(LoopEvaluation.Continue(feedback));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Provides configuration options for <see cref="CompletionMarkerLoopEvaluator"/>.
|
||||
/// </summary>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class CompletionMarkerLoopEvaluatorOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the template used to build the feedback produced when the completion marker has not yet appeared,
|
||||
/// or <see langword="null"/> to use <see cref="CompletionMarkerLoopEvaluator.DefaultFeedbackMessageTemplate"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Any occurrence of <see cref="CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder"/> in the template is
|
||||
/// replaced with the configured completion marker. Any occurrence of
|
||||
/// <see cref="CompletionMarkerLoopEvaluator.LastResponsePlaceholder"/> is replaced, on each evaluation, with the
|
||||
/// text of the agent's latest response — useful for echoing the agent's prior output back to it when the consuming
|
||||
/// <see cref="CompletionMarkerLoopEvaluator"/> is used with a fresh context per iteration.
|
||||
/// </remarks>
|
||||
public string? FeedbackMessageTemplate { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
using Microsoft.Shared.Diagnostics;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// A <see cref="LoopEvaluator"/> that delegates the re-invocation decision and feedback to a user-supplied callback.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This is the most flexible evaluator: the supplied delegate receives the full <see cref="LoopContext"/> and returns
|
||||
/// a <see cref="LoopEvaluation"/>, so it can decide both whether to continue and what feedback (if any) to provide.
|
||||
/// </remarks>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class DelegateLoopEvaluator : LoopEvaluator
|
||||
{
|
||||
private readonly Func<LoopContext, CancellationToken, ValueTask<LoopEvaluation>> _evaluate;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DelegateLoopEvaluator"/> class.
|
||||
/// </summary>
|
||||
/// <param name="evaluate">A callback that decides whether to re-invoke the agent and what feedback to provide.</param>
|
||||
/// <exception cref="ArgumentNullException"><paramref name="evaluate"/> is <see langword="null"/>.</exception>
|
||||
public DelegateLoopEvaluator(Func<LoopContext, CancellationToken, ValueTask<LoopEvaluation>> evaluate)
|
||||
{
|
||||
this._evaluate = Throw.IfNull(evaluate);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
_ = Throw.IfNull(context);
|
||||
return this._evaluate(context, cancellationToken);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.ComponentModel;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the structured verdict returned by the judge chat client used by <see cref="AIJudgeLoopEvaluator"/>.
|
||||
/// </summary>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
internal sealed class JudgeVerdict
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets a value indicating whether the agent has fully addressed the user's original request.
|
||||
/// </summary>
|
||||
[Description("True if the agent has fully addressed the original request, otherwise false.")]
|
||||
public bool Answered { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets an explanation of what is still missing when the request has not been fully addressed.
|
||||
/// </summary>
|
||||
[Description("When 'answered' is false, explain what is still missing or what work remains to fully address the original request.")]
|
||||
public string GapAnalysis { get; set; } = string.Empty;
|
||||
}
|
||||
@@ -0,0 +1,548 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
using Microsoft.Shared.Diagnostics;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// A <see cref="DelegatingAIAgent"/> that re-invokes the wrapped agent in a loop until the configured
|
||||
/// <see cref="LoopEvaluator"/> set decides to stop.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// After each run of the wrapped agent, the configured evaluators are asked whether to re-invoke the agent and what
|
||||
/// feedback to carry forward. This enables patterns such as iterative refinement, working through a task list, or
|
||||
/// judging whether the original request was answered. Out-of-the-box evaluators include
|
||||
/// <see cref="AIJudgeLoopEvaluator"/>, <see cref="CompletionMarkerLoopEvaluator"/>, and
|
||||
/// <see cref="DelegateLoopEvaluator"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// When multiple evaluators are supplied they are evaluated in order after each iteration. The first evaluator that
|
||||
/// asks to re-invoke wins: its feedback drives the next iteration and the remaining evaluators are not evaluated. The
|
||||
/// loop stops only when every evaluator asks to stop. Consequently, evaluator order is priority order and
|
||||
/// <see cref="LoopEvaluation.Stop"/> means "this evaluator does not request continuation" rather than a veto that
|
||||
/// terminates the loop; place stop-only guards accordingly.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The caller's initial messages are sent to the wrapped agent exactly once. By default (when
|
||||
/// <see cref="LoopAgentOptions.FreshContextPerIteration"/> is <see langword="false"/>) the loop reuses a single session
|
||||
/// and sends only the winning evaluator's feedback as the next input, letting the agent continue from session history.
|
||||
/// When <see cref="LoopAgentOptions.FreshContextPerIteration"/> is <see langword="true"/>, each re-invocation restarts
|
||||
/// from the original input messages plus an aggregated feedback log, and the session is reset for each iteration: a
|
||||
/// loop-owned session is created anew, while a caller-supplied session is restored from a snapshot taken at the start
|
||||
/// of the run (so the wrapped agent must support session serialization). An evaluator may instead supply the exact next
|
||||
/// messages via <see cref="LoopEvaluation.ContinueWithMessages"/>, bypassing this construction.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The loop is bounded by a global safety cap (<see cref="LoopAgentOptions.MaxIterations"/>) regardless of the
|
||||
/// evaluators. If an iteration produces a pending tool-approval request, the loop stops and returns that response to
|
||||
/// the caller rather than attempting to resolve the approval automatically.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// A non-streaming run returns, by default, a single <see cref="AgentResponse"/> that aggregates the full transcript
|
||||
/// in order: the on-behalf-of messages the loop injected for each re-invocation followed by that iteration's response
|
||||
/// messages. The caller's original input messages are not echoed. Set
|
||||
/// <see cref="LoopAgentOptions.NonStreamingReturnsLastResponseOnly"/> to instead return only the final iteration's
|
||||
/// response. A streaming run always yields every iteration's updates, emitting the injected on-behalf-of messages as
|
||||
/// updates before each re-invocation. The injected messages can be attributed with
|
||||
/// <see cref="LoopAgentOptions.OnBehalfOfAuthorName"/>, or omitted from the surfaced output entirely with
|
||||
/// <see cref="LoopAgentOptions.ExcludeOnBehalfOfMessages"/>.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class LoopAgent : DelegatingAIAgent
|
||||
{
|
||||
/// <summary>The default value used for <see cref="LoopAgentOptions.MaxIterations"/> when none is specified.</summary>
|
||||
public const int DefaultMaxIterations = 10;
|
||||
|
||||
private readonly IReadOnlyList<LoopEvaluator> _evaluators;
|
||||
private readonly int _maxIterations;
|
||||
private readonly bool _freshContextPerIteration;
|
||||
private readonly string? _onBehalfOfAuthorName;
|
||||
private readonly bool _excludeOnBehalfOfMessages;
|
||||
private readonly bool _nonStreamingReturnsLastResponseOnly;
|
||||
private readonly System.Func<AgentSession, CancellationToken, ValueTask>? _sessionCreatedCallback;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="LoopAgent"/> class with a single evaluator.
|
||||
/// </summary>
|
||||
/// <param name="innerAgent">The underlying agent to invoke in a loop.</param>
|
||||
/// <param name="evaluator">The <see cref="LoopEvaluator"/> that decides whether to re-invoke the agent.</param>
|
||||
/// <param name="options">Optional configuration for the loop. When <see langword="null"/>, defaults are used.</param>
|
||||
/// <param name="loggerFactory">Optional factory used to create the loop's logger.</param>
|
||||
/// <exception cref="System.ArgumentNullException"><paramref name="innerAgent"/> or <paramref name="evaluator"/> is <see langword="null"/>.</exception>
|
||||
/// <exception cref="System.ArgumentOutOfRangeException"><see cref="LoopAgentOptions.MaxIterations"/> is less than 1.</exception>
|
||||
public LoopAgent(AIAgent innerAgent, LoopEvaluator evaluator, LoopAgentOptions? options = null, ILoggerFactory? loggerFactory = null)
|
||||
: this(innerAgent, [Throw.IfNull(evaluator)], options, loggerFactory)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="LoopAgent"/> class with one or more evaluators.
|
||||
/// </summary>
|
||||
/// <param name="innerAgent">The underlying agent to invoke in a loop.</param>
|
||||
/// <param name="evaluators">
|
||||
/// The ordered set of <see cref="LoopEvaluator"/> that decide whether to re-invoke the agent. They are evaluated in
|
||||
/// order after each iteration and the first that asks to re-invoke wins.
|
||||
/// </param>
|
||||
/// <param name="options">Optional configuration for the loop. When <see langword="null"/>, defaults are used.</param>
|
||||
/// <param name="loggerFactory">Optional factory used to create the loop's logger.</param>
|
||||
/// <exception cref="System.ArgumentNullException"><paramref name="innerAgent"/> or <paramref name="evaluators"/> is <see langword="null"/>, or <paramref name="evaluators"/> contains a <see langword="null"/> element.</exception>
|
||||
/// <exception cref="System.ArgumentException"><paramref name="evaluators"/> is empty.</exception>
|
||||
/// <exception cref="System.ArgumentOutOfRangeException"><see cref="LoopAgentOptions.MaxIterations"/> is less than 1.</exception>
|
||||
public LoopAgent(AIAgent innerAgent, IEnumerable<LoopEvaluator> evaluators, LoopAgentOptions? options = null, ILoggerFactory? loggerFactory = null)
|
||||
: base(innerAgent)
|
||||
{
|
||||
_ = Throw.IfNull(evaluators);
|
||||
LoopEvaluator[] evaluatorArray = evaluators.ToArray();
|
||||
if (evaluatorArray.Length == 0)
|
||||
{
|
||||
throw new System.ArgumentException("At least one evaluator must be supplied.", nameof(evaluators));
|
||||
}
|
||||
|
||||
foreach (LoopEvaluator item in evaluatorArray)
|
||||
{
|
||||
_ = Throw.IfNull(item, nameof(evaluators));
|
||||
}
|
||||
|
||||
this._evaluators = evaluatorArray;
|
||||
|
||||
this._maxIterations = Throw.IfLessThan(options?.MaxIterations ?? DefaultMaxIterations, 1);
|
||||
this._freshContextPerIteration = options?.FreshContextPerIteration ?? false;
|
||||
this._onBehalfOfAuthorName = options?.OnBehalfOfAuthorName;
|
||||
this._excludeOnBehalfOfMessages = options?.ExcludeOnBehalfOfMessages ?? false;
|
||||
this._nonStreamingReturnsLastResponseOnly = options?.NonStreamingReturnsLastResponseOnly ?? false;
|
||||
this._sessionCreatedCallback = options?.SessionCreatedCallback;
|
||||
this._logger = (loggerFactory ?? NullLoggerFactory.Instance).CreateLogger<LoopAgent>();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async Task<AgentResponse> RunCoreAsync(
|
||||
IEnumerable<ChatMessage> messages,
|
||||
AgentSession? session = null,
|
||||
AgentRunOptions? options = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
_ = Throw.IfNull(messages);
|
||||
|
||||
// Capture the caller's initial messages (sent once) and ensure the loop always runs against a session.
|
||||
IReadOnlyList<ChatMessage> initialMessages = messages as IReadOnlyList<ChatMessage> ?? messages.ToList();
|
||||
bool sessionProvidedByCaller = session is not null;
|
||||
if (session is null)
|
||||
{
|
||||
session = await this.InnerAgent.CreateSessionAsync(cancellationToken).ConfigureAwait(false);
|
||||
await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// When a fresh context is requested over a caller-supplied session, snapshot the pristine session up front so
|
||||
// each re-invocation can restart from a fresh clone (see CreateFreshIterationSessionAsync). Taken before the
|
||||
// first iteration mutates the session.
|
||||
JsonElement? initialSessionSnapshot = this._freshContextPerIteration && sessionProvidedByCaller
|
||||
? await this.InnerAgent.SerializeSessionAsync(session, cancellationToken: cancellationToken).ConfigureAwait(false)
|
||||
: null;
|
||||
|
||||
LoopContext? context = null;
|
||||
List<string?> feedbackLog = [];
|
||||
IEnumerable<ChatMessage> currentMessages = initialMessages;
|
||||
int iteration = 0;
|
||||
|
||||
// Aggregates the full transcript across iterations: each iteration's surfaced on-behalf-of input messages
|
||||
// followed by that iteration's response messages. Unused when only the final response is returned.
|
||||
List<ChatMessage> transcript = [];
|
||||
|
||||
// The loop-synthesized on-behalf-of messages that drive the current iteration (none for the first iteration).
|
||||
IReadOnlyList<ChatMessage> currentSurfaced = [];
|
||||
|
||||
while (true)
|
||||
{
|
||||
// Run the wrapped agent using the context's session once it exists (it may have been replaced for a fresh
|
||||
// context), otherwise the resolved session for the first run.
|
||||
AgentSession activeSession = context?.Session ?? session;
|
||||
AgentResponse response = await this.InnerAgent.RunAsync(currentMessages, activeSession, options, cancellationToken).ConfigureAwait(false);
|
||||
iteration++;
|
||||
|
||||
// Record this iteration's on-behalf-of input (before the response it elicited) and the response itself.
|
||||
transcript.AddRange(currentSurfaced);
|
||||
transcript.AddRange(response.Messages);
|
||||
|
||||
// Create the context after the first run (so LastResponse is never null) and reuse it thereafter.
|
||||
// Expose the feedback log as a read-only wrapper so evaluators cannot downcast and mutate it; the
|
||||
// wrapper still reflects entries appended by the loop.
|
||||
context ??= new LoopContext(this.InnerAgent, session, initialMessages, response, options) { Feedback = feedbackLog.AsReadOnly() };
|
||||
|
||||
context.Iteration = iteration;
|
||||
context.LastResponse = response;
|
||||
|
||||
// Stop and surface the response when the agent is waiting for a tool approval.
|
||||
if (HasPendingApprovalRequests(response))
|
||||
{
|
||||
return this.BuildResult(response, transcript);
|
||||
}
|
||||
|
||||
// Enforce the global safety cap regardless of what the evaluators want.
|
||||
if (iteration >= this._maxIterations)
|
||||
{
|
||||
this.LogMaxIterationsReached(iteration);
|
||||
return this.BuildResult(response, transcript);
|
||||
}
|
||||
|
||||
// Ask the evaluators whether to continue; stop when none of them request a re-invocation.
|
||||
LoopNextStep step = await this.EvaluateAndBuildNextAsync(context, feedbackLog, initialSessionSnapshot, cancellationToken).ConfigureAwait(false);
|
||||
if (!step.ShouldContinue)
|
||||
{
|
||||
return this.BuildResult(response, transcript);
|
||||
}
|
||||
|
||||
currentMessages = step.Messages;
|
||||
currentSurfaced = step.SurfacedMessages;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async IAsyncEnumerable<AgentResponseUpdate> RunCoreStreamingAsync(
|
||||
IEnumerable<ChatMessage> messages,
|
||||
AgentSession? session = null,
|
||||
AgentRunOptions? options = null,
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
_ = Throw.IfNull(messages);
|
||||
|
||||
// Capture the caller's initial messages (sent once) and ensure the loop always runs against a session.
|
||||
IReadOnlyList<ChatMessage> initialMessages = messages as IReadOnlyList<ChatMessage> ?? messages.ToList();
|
||||
bool sessionProvidedByCaller = session is not null;
|
||||
if (session is null)
|
||||
{
|
||||
session = await this.InnerAgent.CreateSessionAsync(cancellationToken).ConfigureAwait(false);
|
||||
await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// When a fresh context is requested over a caller-supplied session, snapshot the pristine session up front so
|
||||
// each re-invocation can restart from a fresh clone (see CreateFreshIterationSessionAsync). Taken before the
|
||||
// first iteration mutates the session.
|
||||
JsonElement? initialSessionSnapshot = this._freshContextPerIteration && sessionProvidedByCaller
|
||||
? await this.InnerAgent.SerializeSessionAsync(session, cancellationToken: cancellationToken).ConfigureAwait(false)
|
||||
: null;
|
||||
|
||||
LoopContext? context = null;
|
||||
List<string?> feedbackLog = [];
|
||||
IEnumerable<ChatMessage> currentMessages = initialMessages;
|
||||
int iteration = 0;
|
||||
|
||||
// The loop-synthesized on-behalf-of messages that drive the current iteration (none for the first iteration).
|
||||
IReadOnlyList<ChatMessage> currentSurfaced = [];
|
||||
|
||||
while (true)
|
||||
{
|
||||
// Stream this iteration's updates to the caller while collecting them so the iteration's full
|
||||
// response can be aggregated for evaluation (true per-iteration streaming). Uses the context's
|
||||
// session once it exists (it may have been replaced for a fresh context), otherwise the resolved session.
|
||||
AgentSession activeSession = context?.Session ?? session;
|
||||
List<AgentResponseUpdate> updates = [];
|
||||
|
||||
// The on-behalf-of messages that drive this iteration are surfaced before the response they elicit (none
|
||||
// for the first iteration). They are flushed lazily on the first inner update so they can be stamped with
|
||||
// that update's ResponseId/AgentId, keeping them grouped with the iteration for downstream mergers.
|
||||
bool surfacedPending = currentSurfaced.Count > 0;
|
||||
await foreach (var update in this.InnerAgent.RunStreamingAsync(currentMessages, activeSession, options, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
if (surfacedPending)
|
||||
{
|
||||
foreach (ChatMessage surfaced in currentSurfaced)
|
||||
{
|
||||
yield return CreateOnBehalfOfUpdate(surfaced, update.ResponseId);
|
||||
}
|
||||
|
||||
surfacedPending = false;
|
||||
}
|
||||
|
||||
updates.Add(update);
|
||||
yield return update;
|
||||
}
|
||||
|
||||
// The inner agent produced no updates this iteration; surface the on-behalf-of messages anyway. Since there
|
||||
// is no iteration response to inherit from, generate a ResponseId so they still group together downstream.
|
||||
if (surfacedPending)
|
||||
{
|
||||
string fallbackResponseId = System.Guid.NewGuid().ToString("N");
|
||||
foreach (ChatMessage surfaced in currentSurfaced)
|
||||
{
|
||||
yield return CreateOnBehalfOfUpdate(surfaced, fallbackResponseId);
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate this iteration's updates and record the result on the context.
|
||||
iteration++;
|
||||
AgentResponse response = updates.ToAgentResponse();
|
||||
|
||||
// Create the context after the first run (so LastResponse is never null) and reuse it thereafter.
|
||||
// Expose the feedback log as a read-only wrapper so evaluators cannot downcast and mutate it; the
|
||||
// wrapper still reflects entries appended by the loop.
|
||||
context ??= new LoopContext(this.InnerAgent, session, initialMessages, response, options) { Feedback = feedbackLog.AsReadOnly() };
|
||||
|
||||
context.Iteration = iteration;
|
||||
context.LastResponse = response;
|
||||
|
||||
// Stop when the agent is waiting for a tool approval.
|
||||
if (HasPendingApprovalRequests(response))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Enforce the global safety cap regardless of what the evaluators want.
|
||||
if (iteration >= this._maxIterations)
|
||||
{
|
||||
this.LogMaxIterationsReached(iteration);
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Ask the evaluators whether to continue; stop when none of them request a re-invocation.
|
||||
LoopNextStep step = await this.EvaluateAndBuildNextAsync(context, feedbackLog, initialSessionSnapshot, cancellationToken).ConfigureAwait(false);
|
||||
if (!step.ShouldContinue)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
currentMessages = step.Messages;
|
||||
currentSurfaced = step.SurfacedMessages;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates the evaluators in order and, for the first one that requests a re-invocation, builds the next input
|
||||
/// according to the loop's feedback and fresh-context policy.
|
||||
/// </summary>
|
||||
private async ValueTask<LoopNextStep> EvaluateAndBuildNextAsync(LoopContext context, List<string?> feedbackLog, JsonElement? initialSessionSnapshot, CancellationToken cancellationToken)
|
||||
{
|
||||
// Evaluate in order; the first evaluator that requests a re-invocation wins.
|
||||
LoopEvaluation? winner = null;
|
||||
foreach (LoopEvaluator evaluator in this._evaluators)
|
||||
{
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context, cancellationToken).ConfigureAwait(false);
|
||||
if (evaluation.ShouldReinvoke)
|
||||
{
|
||||
winner = evaluation;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Every evaluator asked to stop.
|
||||
if (winner is null)
|
||||
{
|
||||
return LoopNextStep.Stop();
|
||||
}
|
||||
|
||||
// Start the next iteration from a fresh session when a fresh context is requested, so no prior conversation
|
||||
// history leaks across iterations. This applies regardless of how the next input is built (feedback or explicit
|
||||
// ContinueWithMessages): a caller-supplied session is cloned from the pristine start-of-run snapshot; a
|
||||
// loop-owned session is created anew.
|
||||
if (this._freshContextPerIteration)
|
||||
{
|
||||
context.Session = await this.CreateFreshIterationSessionAsync(context, initialSessionSnapshot, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Record one feedback entry for this re-invoked iteration (null when none, including ContinueWithMessages
|
||||
// iterations which carry no feedback string) so the log stays aligned: one entry per re-invoked iteration, with
|
||||
// the last element always corresponding to the latest re-invoked iteration. Continue() normalizes whitespace to null.
|
||||
feedbackLog.Add(winner.Feedback);
|
||||
|
||||
// An evaluator supplied explicit messages: send them verbatim, bypassing feedback/message construction (the
|
||||
// session is still reset above when a fresh context is requested). These are surfaced to the caller as-is (the
|
||||
// evaluator owns them, including any author name).
|
||||
if (winner.Messages is not null)
|
||||
{
|
||||
return LoopNextStep.Continue(winner.Messages, this.Surfaced(winner.Messages));
|
||||
}
|
||||
|
||||
(List<ChatMessage> messages, List<ChatMessage> surfaced) = this.BuildNextMessages(context, feedbackLog);
|
||||
return LoopNextStep.Continue(messages, this.Surfaced(surfaced));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the messages to surface to the caller, honoring <see cref="LoopAgentOptions.ExcludeOnBehalfOfMessages"/>.
|
||||
/// </summary>
|
||||
private IReadOnlyList<ChatMessage> Surfaced(IReadOnlyList<ChatMessage> surfaced)
|
||||
=> this._excludeOnBehalfOfMessages ? [] : surfaced;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a streaming update for a surfaced on-behalf-of message, inheriting the driven iteration's
|
||||
/// <paramref name="responseId"/> so downstream mergers group it with that iteration, and ensuring a unique
|
||||
/// non-null <see cref="AgentResponseUpdate.MessageId"/>. The <see cref="AgentResponseUpdate.AgentId"/> is left
|
||||
/// unset because the message is synthesized by the loop, not produced by the wrapped agent.
|
||||
/// </summary>
|
||||
private static AgentResponseUpdate CreateOnBehalfOfUpdate(ChatMessage message, string? responseId)
|
||||
=> new(message.Role, message.Contents)
|
||||
{
|
||||
AuthorName = message.AuthorName,
|
||||
MessageId = message.MessageId is { Length: > 0 } messageId ? messageId : System.Guid.NewGuid().ToString("N"),
|
||||
ResponseId = responseId,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Builds the messages sent to the wrapped agent for the next iteration along with the subset that should be
|
||||
/// surfaced to the caller (the loop-synthesized on-behalf-of feedback). Replayed caller input is excluded from the
|
||||
/// surfaced subset.
|
||||
/// </summary>
|
||||
private (List<ChatMessage> Messages, List<ChatMessage> Surfaced) BuildNextMessages(LoopContext context, List<string?> feedback)
|
||||
{
|
||||
var messages = new List<ChatMessage>();
|
||||
var surfaced = new List<ChatMessage>();
|
||||
|
||||
if (this._freshContextPerIteration)
|
||||
{
|
||||
// Fresh context: re-send the original task plus an aggregated log of all feedback recorded so far. Only the
|
||||
// synthesized feedback message is surfaced; the replayed caller input messages are not.
|
||||
messages.AddRange(context.InitialMessages);
|
||||
|
||||
ChatMessage? feedbackMessage = this.BuildAggregatedFeedbackMessage(feedback);
|
||||
if (feedbackMessage is not null)
|
||||
{
|
||||
messages.Add(feedbackMessage);
|
||||
surfaced.Add(feedbackMessage);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Reused session: send only the latest feedback verbatim (the session already retains earlier turns). When
|
||||
// the latest iteration produced no feedback, send no messages and let the agent continue from history.
|
||||
string? latest = feedback.Count > 0 ? feedback[feedback.Count - 1] : null;
|
||||
if (!string.IsNullOrWhiteSpace(latest))
|
||||
{
|
||||
var feedbackMessage = new ChatMessage(ChatRole.User, latest) { AuthorName = this._onBehalfOfAuthorName, MessageId = System.Guid.NewGuid().ToString("N") };
|
||||
messages.Add(feedbackMessage);
|
||||
surfaced.Add(feedbackMessage);
|
||||
}
|
||||
}
|
||||
|
||||
return (messages, surfaced);
|
||||
}
|
||||
|
||||
private ChatMessage? BuildAggregatedFeedbackMessage(IReadOnlyList<string?> feedback)
|
||||
{
|
||||
var body = new StringBuilder("## Feedback\n");
|
||||
bool any = false;
|
||||
foreach (string? entry in feedback)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(entry))
|
||||
{
|
||||
body.Append("\n- ").Append(entry);
|
||||
any = true;
|
||||
}
|
||||
}
|
||||
|
||||
return any ? new ChatMessage(ChatRole.User, body.ToString()) { AuthorName = this._onBehalfOfAuthorName, MessageId = System.Guid.NewGuid().ToString("N") } : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Produces the non-streaming run result: either the final iteration's response (when configured) or an
|
||||
/// aggregated response carrying the full transcript with the final response's metadata.
|
||||
/// </summary>
|
||||
private AgentResponse BuildResult(AgentResponse lastResponse, List<ChatMessage> transcript)
|
||||
{
|
||||
if (this._nonStreamingReturnsLastResponseOnly)
|
||||
{
|
||||
return lastResponse;
|
||||
}
|
||||
|
||||
return new AgentResponse(transcript)
|
||||
{
|
||||
AgentId = lastResponse.AgentId,
|
||||
ResponseId = lastResponse.ResponseId,
|
||||
CreatedAt = lastResponse.CreatedAt,
|
||||
FinishReason = lastResponse.FinishReason,
|
||||
Usage = lastResponse.Usage,
|
||||
AdditionalProperties = lastResponse.AdditionalProperties,
|
||||
ContinuationToken = lastResponse.ContinuationToken,
|
||||
};
|
||||
}
|
||||
|
||||
private static bool HasPendingApprovalRequests(AgentResponse response)
|
||||
{
|
||||
foreach (ChatMessage message in response.Messages)
|
||||
{
|
||||
foreach (AIContent content in message.Contents)
|
||||
{
|
||||
if (content is ToolApprovalRequestContent)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void LogMaxIterationsReached(int iteration)
|
||||
{
|
||||
if (this._logger.IsEnabled(LogLevel.Information))
|
||||
{
|
||||
this._logger.LogInformation("LoopAgent reached the maximum of {MaxIterations} iterations and stopped.", iteration);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates the session used for the next iteration when a fresh context is requested. A caller-supplied session is
|
||||
/// restored from the pristine start-of-run snapshot by deserializing a fresh clone; a loop-owned session (no
|
||||
/// snapshot) is created anew. The configured session-created callback is notified of the new session.
|
||||
/// </summary>
|
||||
private async ValueTask<AgentSession> CreateFreshIterationSessionAsync(LoopContext context, JsonElement? initialSessionSnapshot, CancellationToken cancellationToken)
|
||||
{
|
||||
AgentSession session = initialSessionSnapshot is { } snapshot
|
||||
? await this.InnerAgent.DeserializeSessionAsync(snapshot, cancellationToken: cancellationToken).ConfigureAwait(false)
|
||||
: await context.Agent.CreateSessionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await this.NotifyNewSessionAsync(session, cancellationToken).ConfigureAwait(false);
|
||||
return session;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Invokes the configured <see cref="LoopAgentOptions.SessionCreatedCallback"/> (if any) with a session the loop
|
||||
/// has just created, so the caller can observe the latest session.
|
||||
/// </summary>
|
||||
private async ValueTask NotifyNewSessionAsync(AgentSession session, CancellationToken cancellationToken)
|
||||
{
|
||||
if (this._sessionCreatedCallback is not null)
|
||||
{
|
||||
await this._sessionCreatedCallback(session, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Represents the loop's decision for the next iteration: stop, or continue with a set of messages.</summary>
|
||||
private readonly struct LoopNextStep
|
||||
{
|
||||
private LoopNextStep(bool shouldContinue, IReadOnlyList<ChatMessage> messages, IReadOnlyList<ChatMessage> surfacedMessages)
|
||||
{
|
||||
this.ShouldContinue = shouldContinue;
|
||||
this.Messages = messages;
|
||||
this.SurfacedMessages = surfacedMessages;
|
||||
}
|
||||
|
||||
public bool ShouldContinue { get; }
|
||||
|
||||
/// <summary>Gets the full set of messages sent to the wrapped agent for the next iteration.</summary>
|
||||
public IReadOnlyList<ChatMessage> Messages { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the subset of <see cref="Messages"/> the loop synthesized on the caller's behalf (feedback or
|
||||
/// evaluator-supplied messages) that should be surfaced to the caller. Replayed caller input is excluded.
|
||||
/// </summary>
|
||||
public IReadOnlyList<ChatMessage> SurfacedMessages { get; }
|
||||
|
||||
public static LoopNextStep Stop() => new(shouldContinue: false, [], []);
|
||||
|
||||
public static LoopNextStep Continue(IReadOnlyList<ChatMessage> messages, IReadOnlyList<ChatMessage> surfacedMessages)
|
||||
=> new(shouldContinue: true, messages, surfacedMessages);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Provides configuration options for <see cref="LoopAgent"/>.
|
||||
/// </summary>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class LoopAgentOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the global safety cap on the number of times the wrapped agent is invoked in a single loop run,
|
||||
/// or <see langword="null"/> to use <see cref="LoopAgent.DefaultMaxIterations"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This is an absolute upper bound that applies regardless of the configured <see cref="LoopEvaluator"/> set. An
|
||||
/// evaluator may stop the loop earlier, but no evaluator can cause the loop to exceed this cap, so raise this value
|
||||
/// if you intend to allow longer loops.
|
||||
/// </remarks>
|
||||
public int? MaxIterations { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets a value indicating whether each re-invocation restarts from a clean context: the original input
|
||||
/// messages plus an aggregated feedback log, rather than the latest feedback appended to the prior conversation.
|
||||
/// Defaults to <see langword="false"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// This rebuilds the input <em>messages</em> each iteration and resets the session before each re-invocation so no
|
||||
/// prior conversation history leaks across iterations. When the loop owns the session it creates a new one each
|
||||
/// iteration. When the caller supplies a session, <see cref="LoopAgent"/> serializes it once at the start of the run
|
||||
/// and restores a fresh clone (by deserializing that snapshot) before each re-invocation; this requires the wrapped
|
||||
/// agent to support session serialization. The first iteration still runs against the caller's supplied session.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Note that cloning will only result in a fresh context, if the chat history storage mechanism supports cloning.
|
||||
/// For example the default in-memory storage supports cloning, since the messages are serialized as part of the snapshot.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// However, if the Conversations service is used, which stores messages in a single threaded list of messages,
|
||||
/// then the cloned session will still contain the full message history, since the snapshot only captures an id reference
|
||||
/// to the conversation and not the individual messages.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// On the other hand, if responses are used with response ids, cloning will work well, since response ids are
|
||||
/// forkable. Each new response has its own id, and is based on the id of the previous response.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// On iterations where an evaluator returns explicit messages via
|
||||
/// <see cref="LoopEvaluation.ContinueWithMessages"/>, the session is still reset (a fresh or cloned session is
|
||||
/// used); only the rebuild of the input messages from the feedback log is skipped, because the evaluator's explicit
|
||||
/// messages are sent verbatim.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public bool FreshContextPerIteration { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the author name stamped on the loop-synthesized "on-behalf-of" messages that the loop injects
|
||||
/// into the wrapped agent for re-invocations, or <see langword="null"/> to leave them unattributed. Defaults to
|
||||
/// <see langword="null"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When the loop re-invokes the wrapped agent it sends feedback messages on the caller's behalf. Setting this name
|
||||
/// marks those autonomous messages (for example with a value such as <c>"loop"</c>) so that callers and the wrapped
|
||||
/// agent can distinguish them from the caller's own turns. It is applied only to messages the loop synthesizes
|
||||
/// itself; messages supplied explicitly by an evaluator via <see cref="LoopEvaluation.ContinueWithMessages"/> are
|
||||
/// left untouched, and the caller's original input messages are never modified.
|
||||
/// </remarks>
|
||||
public string? OnBehalfOfAuthorName { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets a value indicating whether the on-behalf-of messages the loop injects for re-invocations are
|
||||
/// omitted from the output surfaced back to the caller. Defaults to <see langword="false"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When <see langword="false"/> (the default) a streaming run emits the injected feedback / evaluator-supplied
|
||||
/// messages as updates before each re-invocation, and a non-streaming run includes them in the aggregated
|
||||
/// transcript, so callers can see the loop acting autonomously on their behalf. Set this to <see langword="true"/>
|
||||
/// to omit those messages from the returned output and surface only the wrapped agent's responses; the messages are
|
||||
/// still sent to the wrapped agent. This setting has no effect when
|
||||
/// <see cref="NonStreamingReturnsLastResponseOnly"/> causes a non-streaming run to return only the final response.
|
||||
/// </remarks>
|
||||
public bool ExcludeOnBehalfOfMessages { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets a value indicating whether a non-streaming run returns only the final iteration's response instead
|
||||
/// of the aggregated transcript of every iteration. Defaults to <see langword="false"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// By default a non-streaming <see cref="LoopAgent"/> run returns a single <see cref="AgentResponse"/> that
|
||||
/// aggregates, in order, the on-behalf-of messages the loop injected and the responses produced by every
|
||||
/// iteration — mirroring the full sequence of updates yielded by a streaming run. Set this to <see langword="true"/>
|
||||
/// to instead return only the last iteration's <see cref="AgentResponse"/>. This setting affects non-streaming runs
|
||||
/// only; streaming runs always yield every iteration's updates.
|
||||
/// </remarks>
|
||||
public bool NonStreamingReturnsLastResponseOnly { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets an optional callback invoked whenever <see cref="LoopAgent"/> creates a new session, so the caller
|
||||
/// can capture the latest session (for example to continue the conversation after the loop completes). Defaults to
|
||||
/// <see langword="null"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The callback is invoked with each session the loop itself creates: the initial loop-owned session (when the
|
||||
/// caller does not supply one) and, when <see cref="FreshContextPerIteration"/> is enabled, every session created
|
||||
/// for a re-invocation — whether a brand-new loop-owned session or a fresh clone deserialized from the caller's
|
||||
/// original session. It is not invoked for a caller-supplied session, since the caller already holds that one. When
|
||||
/// it fires multiple times, the most recent invocation carries the session the loop is currently using.
|
||||
/// </remarks>
|
||||
public Func<AgentSession, CancellationToken, ValueTask>? SessionCreatedCallback { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
using Microsoft.Shared.Diagnostics;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Provides the per-run state that a <see cref="LoopEvaluator"/> uses to decide whether a
|
||||
/// <see cref="LoopAgent"/> should re-invoke the wrapped agent and what feedback to provide.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// A single <see cref="LoopContext"/> instance is created for each <see cref="LoopAgent"/> run and is
|
||||
/// reused across iterations, with <see cref="Iteration"/> and <see cref="LastResponse"/> updated before
|
||||
/// each call to <see cref="LoopEvaluator.EvaluateAsync"/>. Because evaluator instances are expected to be
|
||||
/// stateless and may be shared across concurrent runs, any per-run mutable state must be stored on this
|
||||
/// context — for example via <see cref="AdditionalProperties"/> — rather than in fields on the evaluator itself.
|
||||
/// </remarks>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class LoopContext
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="LoopContext"/> class.
|
||||
/// </summary>
|
||||
/// <param name="agent">The wrapped <see cref="AIAgent"/> that is being looped.</param>
|
||||
/// <param name="session">The <see cref="AgentSession"/> used for the loop.</param>
|
||||
/// <param name="initialMessages">The messages passed in for the first iteration of the loop.</param>
|
||||
/// <param name="lastResponse">The <see cref="AgentResponse"/> produced by the iteration that just completed.</param>
|
||||
/// <param name="runOptions">The <see cref="AgentRunOptions"/> that were passed to the loop run, if any.</param>
|
||||
/// <exception cref="System.ArgumentNullException">
|
||||
/// <paramref name="agent"/>, <paramref name="session"/>, <paramref name="initialMessages"/>, or
|
||||
/// <paramref name="lastResponse"/> is <see langword="null"/>.
|
||||
/// </exception>
|
||||
public LoopContext(
|
||||
AIAgent agent,
|
||||
AgentSession session,
|
||||
IReadOnlyList<ChatMessage> initialMessages,
|
||||
AgentResponse lastResponse,
|
||||
AgentRunOptions? runOptions = null)
|
||||
{
|
||||
this.Agent = Throw.IfNull(agent);
|
||||
this.Session = Throw.IfNull(session);
|
||||
this.InitialMessages = Throw.IfNull(initialMessages);
|
||||
this.LastResponse = Throw.IfNull(lastResponse);
|
||||
this.RunOptions = runOptions;
|
||||
}
|
||||
|
||||
/// <summary>Gets the wrapped <see cref="AIAgent"/> that is being looped.</summary>
|
||||
public AIAgent Agent { get; }
|
||||
|
||||
/// <summary>Gets the <see cref="AgentSession"/> used for the loop.</summary>
|
||||
/// <remarks>
|
||||
/// When the caller does not provide a session, <see cref="LoopAgent"/> creates one up front. By default the same
|
||||
/// session is reused across every iteration so that conversation continuity is preserved and the original request
|
||||
/// is not replayed. When <see cref="LoopAgentOptions.FreshContextPerIteration"/> is enabled, <see cref="LoopAgent"/>
|
||||
/// resets the session before each re-invocation: a loop-owned session is created anew, while a caller-supplied
|
||||
/// session is restored from a snapshot taken at the start of the run by deserializing a fresh clone.
|
||||
/// </remarks>
|
||||
public AgentSession Session { get; internal set; }
|
||||
|
||||
/// <summary>Gets the messages that were passed in for the first iteration of the loop.</summary>
|
||||
public IReadOnlyList<ChatMessage> InitialMessages { get; }
|
||||
|
||||
/// <summary>Gets the <see cref="AgentRunOptions"/> that were passed to the loop run, if any.</summary>
|
||||
public AgentRunOptions? RunOptions { get; }
|
||||
|
||||
/// <summary>Gets the number of completed agent runs so far (1-based after the first run).</summary>
|
||||
public int Iteration { get; internal set; }
|
||||
|
||||
/// <summary>Gets the <see cref="AgentResponse"/> produced by the iteration that just completed.</summary>
|
||||
public AgentResponse LastResponse { get; internal set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the feedback accumulated across iterations so far, one entry per re-invoked iteration in order.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Each entry is the feedback supplied by the evaluator that requested the corresponding re-invocation, or
|
||||
/// <see langword="null"/> when that iteration produced no feedback string (for example a plain
|
||||
/// <see cref="LoopEvaluation.Continue(string)"/> with no text, or a <see cref="LoopEvaluation.ContinueWithMessages"/>
|
||||
/// that supplied explicit messages instead). The log records one entry per re-invoked iteration regardless of mode,
|
||||
/// so the last entry always corresponds to the most recent re-invoked iteration. This log is owned and populated by
|
||||
/// <see cref="LoopAgent"/>; evaluators may read it to reason over prior feedback.
|
||||
/// </remarks>
|
||||
public IReadOnlyList<string?> Feedback { get; internal set; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Gets a mutable bag of per-run state shared across iterations and available to every evaluator.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This dictionary is owned by the loop run (not by any evaluator instance) so that evaluators can remain
|
||||
/// stateless. Evaluators can stash arbitrary per-run state here keyed by a collision-resistant key.
|
||||
/// </remarks>
|
||||
public AdditionalPropertiesDictionary AdditionalProperties { get; } = new();
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
using Microsoft.Shared.Diagnostics;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the result produced by a <see cref="LoopEvaluator"/> after an agent iteration: whether the
|
||||
/// <see cref="LoopAgent"/> should re-invoke the wrapped agent and, optionally, the feedback or explicit messages that
|
||||
/// should inform the next iteration.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// An evaluator is concerned only with the judgment (continue or stop) and what to carry forward. In the common case
|
||||
/// it returns a feedback string and lets the <see cref="LoopAgent"/> decide how that feedback is turned into the next
|
||||
/// input (and whether the session is reset). For full control, <see cref="ContinueWithMessages"/> supplies the exact
|
||||
/// messages to send next, bypassing the loop's feedback and message construction.
|
||||
/// </remarks>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public sealed class LoopEvaluation
|
||||
{
|
||||
private static readonly LoopEvaluation s_stop = new(shouldReinvoke: false, feedback: null, messages: null);
|
||||
|
||||
private LoopEvaluation(bool shouldReinvoke, string? feedback, IReadOnlyList<ChatMessage>? messages)
|
||||
{
|
||||
this.ShouldReinvoke = shouldReinvoke;
|
||||
this.Feedback = feedback;
|
||||
this.Messages = messages;
|
||||
}
|
||||
|
||||
/// <summary>Gets a value indicating whether the loop should run the wrapped agent again.</summary>
|
||||
public bool ShouldReinvoke { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the feedback describing what is missing or what the agent should do next, or <see langword="null"/> when
|
||||
/// no feedback was produced.
|
||||
/// </summary>
|
||||
/// <remarks>This value is only meaningful when <see cref="ShouldReinvoke"/> is <see langword="true"/>.</remarks>
|
||||
public string? Feedback { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the explicit messages to send on the next iteration, or <see langword="null"/> when the loop should build
|
||||
/// the next input from feedback instead.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When non-<see langword="null"/>, the <see cref="LoopAgent"/> sends these messages verbatim and does not apply
|
||||
/// its feedback or message construction. The session is still reset when
|
||||
/// <see cref="LoopAgentOptions.FreshContextPerIteration"/> is enabled. Only meaningful when
|
||||
/// <see cref="ShouldReinvoke"/> is <see langword="true"/>.
|
||||
/// </remarks>
|
||||
internal IReadOnlyList<ChatMessage>? Messages { get; }
|
||||
|
||||
/// <summary>Creates an evaluation that stops the loop and returns the latest response to the caller.</summary>
|
||||
/// <returns>An evaluation with <see cref="ShouldReinvoke"/> set to <see langword="false"/>.</returns>
|
||||
public static LoopEvaluation Stop() => s_stop;
|
||||
|
||||
/// <summary>Creates an evaluation that re-invokes the wrapped agent, optionally carrying feedback forward.</summary>
|
||||
/// <param name="feedback">
|
||||
/// Optional feedback to inform the next iteration. <see langword="null"/>, empty, or whitespace is treated as no
|
||||
/// feedback.
|
||||
/// </param>
|
||||
/// <returns>An evaluation with <see cref="ShouldReinvoke"/> set to <see langword="true"/>.</returns>
|
||||
public static LoopEvaluation Continue(string? feedback = null) => new(shouldReinvoke: true, string.IsNullOrWhiteSpace(feedback) ? null : feedback, messages: null);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an evaluation that re-invokes the wrapped agent with the specified messages, bypassing the loop's
|
||||
/// feedback and message construction.
|
||||
/// </summary>
|
||||
/// <param name="messages">The messages to send to the wrapped agent on the next iteration.</param>
|
||||
/// <returns>An evaluation with <see cref="ShouldReinvoke"/> set to <see langword="true"/>.</returns>
|
||||
/// <exception cref="System.ArgumentNullException"><paramref name="messages"/> is <see langword="null"/>.</exception>
|
||||
/// <remarks>
|
||||
/// Use this for full control over the next input (for example to send non-user roles, multiple messages, or
|
||||
/// non-text content). The supplied messages are sent verbatim and the loop does not accumulate or inject feedback
|
||||
/// for this iteration.
|
||||
/// </remarks>
|
||||
public static LoopEvaluation ContinueWithMessages(IEnumerable<ChatMessage> messages)
|
||||
{
|
||||
_ = Throw.IfNull(messages);
|
||||
return new LoopEvaluation(shouldReinvoke: true, feedback: null, messages: messages as IReadOnlyList<ChatMessage> ?? messages.ToList());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Shared.DiagnosticIds;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Provides the abstract base class for the component that decides, after each agent iteration, whether a
|
||||
/// <see cref="LoopAgent"/> should re-invoke the wrapped agent and what feedback to provide.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// A <see cref="LoopEvaluator"/> is pure judgment: it inspects the <see cref="LoopContext"/> and returns a
|
||||
/// <see cref="LoopEvaluation"/> describing whether to continue and any feedback for the next iteration. It does not
|
||||
/// manage the session or construct the next input messages — that is the responsibility of the
|
||||
/// <see cref="LoopAgent"/> that consumes it.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Out-of-the-box implementations include <see cref="AIJudgeLoopEvaluator"/>, <see cref="DelegateLoopEvaluator"/>,
|
||||
/// and <see cref="CompletionMarkerLoopEvaluator"/>. Implementations should be stateless and safe to share across
|
||||
/// concurrent loop runs; any per-run state must be stored on the supplied <see cref="LoopContext"/>.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
[Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)]
|
||||
public abstract class LoopEvaluator
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates the loop state after an iteration and decides whether to re-invoke the wrapped agent and what
|
||||
/// feedback to provide.
|
||||
/// </summary>
|
||||
/// <param name="context">The per-run <see cref="LoopContext"/> describing the current loop state.</param>
|
||||
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests.</param>
|
||||
/// <returns>
|
||||
/// A value task whose result is a <see cref="LoopEvaluation"/> indicating whether to continue and, if so, the
|
||||
/// feedback to carry forward to the next iteration.
|
||||
/// </returns>
|
||||
public abstract ValueTask<LoopEvaluation> EvaluateAsync(LoopContext context, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Microsoft.Agents.AI;
|
||||
|
||||
/// <summary>
|
||||
/// Source-generated <see cref="JsonSerializerContext"/> for loop types that require JSON serialization, such as the
|
||||
/// structured <see cref="JudgeVerdict"/> used by <see cref="AIJudgeLoopEvaluator"/>.
|
||||
/// </summary>
|
||||
[JsonSourceGenerationOptions(JsonSerializerDefaults.Web)]
|
||||
[JsonSerializable(typeof(JudgeVerdict))]
|
||||
[ExcludeFromCodeCoverage]
|
||||
internal sealed partial class LoopJsonContext : JsonSerializerContext;
|
||||
@@ -0,0 +1,314 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Moq;
|
||||
|
||||
using static Microsoft.Agents.AI.UnitTests.LoopTestHelpers;
|
||||
|
||||
namespace Microsoft.Agents.AI.UnitTests;
|
||||
|
||||
/// <summary>
|
||||
/// Unit tests for the <see cref="AIJudgeLoopEvaluator"/> class.
|
||||
/// </summary>
|
||||
public class AIJudgeLoopEvaluatorTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Verify that the evaluator stops when the judge reports the request was answered.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_Answered_StopsAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateJudgeClient("{\"answered\":true}");
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.False(evaluation.ShouldReinvoke);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that when not answered the evaluator continues with feedback carrying the judge's gap analysis.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_NotAnswered_ContinuesWithGapAnalysisAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateJudgeClient("{\"answered\":false,\"gapAnalysis\":\"the cost estimate is missing\"}");
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.NotNull(evaluation.Feedback);
|
||||
Assert.Contains("the cost estimate is missing", evaluation.Feedback!);
|
||||
Assert.DoesNotContain(AIJudgeLoopEvaluator.GapAnalysisPlaceholder, evaluation.Feedback!);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the evaluator falls back to text parsing and stops when the DONE verdict marker is present.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_TextFallback_StopsWhenAnsweredAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateJudgeClient(AIJudgeLoopEvaluator.DoneVerdictMarker);
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.False(evaluation.ShouldReinvoke);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the gap-analysis placeholder is filled with a fallback token when no structured output is produced.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_NotAnswered_TextFallback_InjectsUnknownGapAnalysisAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateJudgeClient(AIJudgeLoopEvaluator.MoreVerdictMarker);
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.Contains("<unknown>", evaluation.Feedback!);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the text fallback keeps looping for replies that merely contain the substring "ANSWERED" (for
|
||||
/// example "UNANSWERED" or "NOT ANSWERED") rather than the explicit DONE verdict marker.
|
||||
/// </summary>
|
||||
[Theory]
|
||||
[InlineData("UNANSWERED")]
|
||||
[InlineData("NOT ANSWERED")]
|
||||
[InlineData("The request is not yet answered.")]
|
||||
public async Task EvaluateAsync_TextFallback_AmbiguousReply_ContinuesAsync(string reply)
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateJudgeClient(reply);
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that custom judge instructions from options are sent to the judge client.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_CustomInstructions_AreSentToJudgeAsync()
|
||||
{
|
||||
// Arrange
|
||||
List<ChatMessage>? judgeMessages = null;
|
||||
var judgeMock = new Mock<IChatClient>();
|
||||
judgeMock.Setup(c => c.GetResponseAsync(
|
||||
It.IsAny<IEnumerable<ChatMessage>>(),
|
||||
It.IsAny<ChatOptions>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.Callback<IEnumerable<ChatMessage>, ChatOptions?, CancellationToken>((msgs, _, _) => judgeMessages = msgs.ToList())
|
||||
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{\"answered\":true}")));
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeMock.Object, new AIJudgeLoopEvaluatorOptions { Instructions = "CUSTOM JUDGE PROMPT" });
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(judgeMessages);
|
||||
Assert.Contains(judgeMessages!, m => m.Role == ChatRole.System && m.Text == "CUSTOM JUDGE PROMPT");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that a custom feedback message template from options is honored.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_CustomFeedbackMessageTemplate_IsHonoredAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateJudgeClient("{\"answered\":false,\"gapAnalysis\":\"add unit tests\"}");
|
||||
const string Template = "Please address: " + AIJudgeLoopEvaluator.GapAnalysisPlaceholder;
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient, new AIJudgeLoopEvaluatorOptions { FeedbackMessageTemplate = Template });
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("Please address: add unit tests", evaluation.Feedback);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that non-text content in the original request (for example an image) is forwarded to the judge
|
||||
/// rather than being silently dropped when flattening the request to text.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_NonTextRequestContent_IsForwardedToJudgeAsync()
|
||||
{
|
||||
// Arrange
|
||||
List<ChatMessage>? judgeMessages = null;
|
||||
var judgeMock = new Mock<IChatClient>();
|
||||
judgeMock.Setup(c => c.GetResponseAsync(
|
||||
It.IsAny<IEnumerable<ChatMessage>>(),
|
||||
It.IsAny<ChatOptions>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.Callback<IEnumerable<ChatMessage>, ChatOptions?, CancellationToken>((msgs, _, _) => judgeMessages = msgs.ToList())
|
||||
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{\"answered\":true}")));
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeMock.Object);
|
||||
var imageContent = new DataContent(new byte[] { 1, 2, 3, 4 }, "image/png");
|
||||
var context = new LoopContext(
|
||||
new Mock<AIAgent>().Object,
|
||||
new ChatClientAgentSession(),
|
||||
[new ChatMessage(ChatRole.User, [imageContent])],
|
||||
new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial answer")]));
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(judgeMessages);
|
||||
ChatMessage userMessage = Assert.Single(judgeMessages!, m => m.Role == ChatRole.User);
|
||||
Assert.Contains(userMessage.Contents.OfType<DataContent>(), c => c.MediaType == "image/png");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the constructor throws when the judge client is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void AIJudgeLoopEvaluator_NullClient_Throws()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.Throws<ArgumentNullException>("judgeClient", () => new AIJudgeLoopEvaluator(null!));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that EvaluateAsync throws when the context is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_NullContext_ThrowsAsync()
|
||||
{
|
||||
// Arrange
|
||||
var evaluator = new AIJudgeLoopEvaluator(CreateJudgeClient("{\"answered\":true}"));
|
||||
|
||||
// Act & Assert
|
||||
await Assert.ThrowsAsync<ArgumentNullException>("context", async () => await evaluator.EvaluateAsync(null!));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that supplied criteria are rendered into the default judge instructions as a bullet list and the
|
||||
/// placeholder is consumed.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_Criteria_AreRenderedIntoDefaultInstructionsAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
|
||||
var options = new AIJudgeLoopEvaluatorOptions { Criteria = ["Must cite sources", "Must be under 200 words"] };
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient, options);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
|
||||
Assert.Contains("The response must satisfy all of the following criteria:", system);
|
||||
Assert.Contains("- Must cite sources", system);
|
||||
Assert.Contains("- Must be under 200 words", system);
|
||||
Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that when no criteria are supplied the placeholder is removed and no criteria block is added to the
|
||||
/// default instructions.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_NoCriteria_LeavesDefaultInstructionsWithoutCriteriaBlockAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
|
||||
Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system);
|
||||
Assert.DoesNotContain("The response must satisfy all of the following criteria:", system);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that criteria are injected at the placeholder location in custom instructions.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_CustomInstructionsWithPlaceholder_InjectsCriteriaAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
|
||||
const string Instructions = "Judge the answer." + AIJudgeLoopEvaluator.CriteriaPlaceholder + " Be strict.";
|
||||
var options = new AIJudgeLoopEvaluatorOptions { Instructions = Instructions, Criteria = ["Must include code"] };
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient, options);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
|
||||
Assert.StartsWith("Judge the answer.", system);
|
||||
Assert.EndsWith("Be strict.", system);
|
||||
Assert.Contains("- Must include code", system);
|
||||
Assert.DoesNotContain(AIJudgeLoopEvaluator.CriteriaPlaceholder, system);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that custom instructions without the placeholder do not receive the criteria.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_CustomInstructionsWithoutPlaceholder_OmitsCriteriaAsync()
|
||||
{
|
||||
// Arrange
|
||||
var judgeClient = CreateCapturingJudgeClient("{\"answered\":true}", out List<ChatMessage> judgeMessages);
|
||||
const string Instructions = "Judge the answer and be strict.";
|
||||
var options = new AIJudgeLoopEvaluatorOptions { Instructions = Instructions, Criteria = ["Must include code"] };
|
||||
var evaluator = new AIJudgeLoopEvaluator(judgeClient, options);
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
string system = judgeMessages.Single(static m => m.Role == ChatRole.System).Text;
|
||||
Assert.Equal(Instructions, system);
|
||||
}
|
||||
|
||||
private static LoopContext CreateContext() => new(
|
||||
new Mock<AIAgent>().Object,
|
||||
new ChatClientAgentSession(),
|
||||
[new ChatMessage(ChatRole.User, "original question")],
|
||||
new AgentResponse([new ChatMessage(ChatRole.Assistant, "partial answer")]));
|
||||
}
|
||||
+145
@@ -0,0 +1,145 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Moq;
|
||||
|
||||
namespace Microsoft.Agents.AI.UnitTests;
|
||||
|
||||
/// <summary>
|
||||
/// Unit tests for the <see cref="CompletionMarkerLoopEvaluator"/> class.
|
||||
/// </summary>
|
||||
public class CompletionMarkerLoopEvaluatorTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Verify that the constructor throws when the marker is null, empty, or whitespace.
|
||||
/// </summary>
|
||||
/// <param name="marker">The invalid marker value.</param>
|
||||
[Theory]
|
||||
[InlineData(null)]
|
||||
[InlineData("")]
|
||||
[InlineData(" ")]
|
||||
public void CompletionMarkerLoopEvaluator_InvalidMarker_Throws(string? marker)
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.ThrowsAny<ArgumentException>(() => new CompletionMarkerLoopEvaluator(marker!));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the evaluator stops the loop when the marker appears in the latest response.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_MarkerPresent_StopsAsync()
|
||||
{
|
||||
// Arrange
|
||||
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
|
||||
LoopContext context = CreateContext("all DONE here");
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.False(evaluation.ShouldReinvoke);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the evaluator continues with default feedback (containing the marker) when the marker is absent.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_MarkerAbsent_ContinuesWithDefaultFeedbackAsync()
|
||||
{
|
||||
// Arrange
|
||||
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
|
||||
LoopContext context = CreateContext("still working");
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.NotNull(evaluation.Feedback);
|
||||
Assert.Contains("DONE", evaluation.Feedback!);
|
||||
Assert.DoesNotContain(CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder, evaluation.Feedback!);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that a custom feedback template is honored, with the completion marker substituted for the placeholder.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_MarkerAbsent_CustomTemplate_IsHonoredAsync()
|
||||
{
|
||||
// Arrange
|
||||
const string Template = "Keep going and finish with " + CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when done.";
|
||||
var evaluator = new CompletionMarkerLoopEvaluator("FINISHED", new CompletionMarkerLoopEvaluatorOptions { FeedbackMessageTemplate = Template });
|
||||
LoopContext context = CreateContext("still working");
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.Equal("Keep going and finish with FINISHED when done.", evaluation.Feedback);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that a custom feedback template containing the last-response placeholder echoes the agent's latest
|
||||
/// response text, with no leftover placeholder.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_MarkerAbsent_CustomTemplate_SubstitutesLastResponseAsync()
|
||||
{
|
||||
// Arrange
|
||||
const string Template = "Your previous attempt was: '" + CompletionMarkerLoopEvaluator.LastResponsePlaceholder +
|
||||
"'. Improve it and finish with " + CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder + " when done.";
|
||||
var evaluator = new CompletionMarkerLoopEvaluator("FINISHED", new CompletionMarkerLoopEvaluatorOptions { FeedbackMessageTemplate = Template });
|
||||
LoopContext context = CreateContext("candidate name: NoteNest");
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.Equal("Your previous attempt was: 'candidate name: NoteNest'. Improve it and finish with FINISHED when done.", evaluation.Feedback);
|
||||
Assert.DoesNotContain(CompletionMarkerLoopEvaluator.LastResponsePlaceholder, evaluation.Feedback!);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the default feedback template does not include the agent's latest response text (the last-response
|
||||
/// placeholder is opt-in via a custom template).
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_MarkerAbsent_DefaultTemplate_DoesNotIncludeLastResponseAsync()
|
||||
{
|
||||
// Arrange
|
||||
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
|
||||
LoopContext context = CreateContext("candidate name: NoteNest");
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.Equal(CompletionMarkerLoopEvaluator.DefaultFeedbackMessageTemplate.Replace(CompletionMarkerLoopEvaluator.CompletionMarkerPlaceholder, "DONE"), evaluation.Feedback);
|
||||
Assert.DoesNotContain("NoteNest", evaluation.Feedback!);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that EvaluateAsync throws when the context is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_NullContext_ThrowsAsync()
|
||||
{
|
||||
// Arrange
|
||||
var evaluator = new CompletionMarkerLoopEvaluator("DONE");
|
||||
|
||||
// Act & Assert
|
||||
await Assert.ThrowsAsync<ArgumentNullException>("context", async () => await evaluator.EvaluateAsync(null!));
|
||||
}
|
||||
|
||||
private static LoopContext CreateContext(string responseText) => new(
|
||||
new Mock<AIAgent>().Object,
|
||||
new ChatClientAgentSession(),
|
||||
[new ChatMessage(ChatRole.User, "go")],
|
||||
new AgentResponse([new ChatMessage(ChatRole.Assistant, responseText)]));
|
||||
}
|
||||
+113
@@ -0,0 +1,113 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Moq;
|
||||
|
||||
namespace Microsoft.Agents.AI.UnitTests;
|
||||
|
||||
/// <summary>
|
||||
/// Unit tests for the <see cref="DelegateLoopEvaluator"/> class.
|
||||
/// </summary>
|
||||
public class DelegateLoopEvaluatorTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Verify that the constructor throws when the evaluate delegate is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void DelegateLoopEvaluator_NullDelegate_Throws()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.Throws<ArgumentNullException>("evaluate", () => new DelegateLoopEvaluator(null!));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that EvaluateAsync throws when the context is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_NullContext_ThrowsAsync()
|
||||
{
|
||||
// Arrange
|
||||
var evaluator = new DelegateLoopEvaluator((_, _) => new ValueTask<LoopEvaluation>(LoopEvaluation.Stop()));
|
||||
|
||||
// Act & Assert
|
||||
await Assert.ThrowsAsync<ArgumentNullException>("context", async () => await evaluator.EvaluateAsync(null!));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that EvaluateAsync invokes the supplied delegate and returns the evaluation it produces.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_InvokesDelegate_AndReturnsItsEvaluationAsync()
|
||||
{
|
||||
// Arrange
|
||||
bool invoked = false;
|
||||
var expected = LoopEvaluation.Continue("feedback");
|
||||
var evaluator = new DelegateLoopEvaluator((_, _) =>
|
||||
{
|
||||
invoked = true;
|
||||
return new ValueTask<LoopEvaluation>(expected);
|
||||
});
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.True(invoked);
|
||||
Assert.Same(expected, evaluation);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that EvaluateAsync passes the same context instance to the delegate.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_PassesContextToDelegateAsync()
|
||||
{
|
||||
// Arrange
|
||||
LoopContext? received = null;
|
||||
var evaluator = new DelegateLoopEvaluator((ctx, _) =>
|
||||
{
|
||||
received = ctx;
|
||||
return new ValueTask<LoopEvaluation>(LoopEvaluation.Stop());
|
||||
});
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.Same(context, received);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that EvaluateAsync forwards the cancellation token to the delegate.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EvaluateAsync_ForwardsCancellationTokenToDelegateAsync()
|
||||
{
|
||||
// Arrange
|
||||
using var cts = new CancellationTokenSource();
|
||||
CancellationToken received = default;
|
||||
var evaluator = new DelegateLoopEvaluator((_, ct) =>
|
||||
{
|
||||
received = ct;
|
||||
return new ValueTask<LoopEvaluation>(LoopEvaluation.Stop());
|
||||
});
|
||||
LoopContext context = CreateContext();
|
||||
|
||||
// Act
|
||||
await evaluator.EvaluateAsync(context, cts.Token);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(cts.Token, received);
|
||||
}
|
||||
|
||||
private static LoopContext CreateContext() => new(
|
||||
new Mock<AIAgent>().Object,
|
||||
new ChatClientAgentSession(),
|
||||
[new ChatMessage(ChatRole.User, "go")],
|
||||
new AgentResponse([new ChatMessage(ChatRole.Assistant, "response")]));
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,146 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Moq;
|
||||
|
||||
namespace Microsoft.Agents.AI.UnitTests;
|
||||
|
||||
/// <summary>
|
||||
/// Unit tests for the <see cref="LoopContext"/> class, including its public constructor used to test custom evaluators.
|
||||
/// </summary>
|
||||
public class LoopContextTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Verify that the constructor throws when the agent is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Constructor_NullAgent_Throws()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.Throws<ArgumentNullException>("agent", () => new LoopContext(
|
||||
null!, new ChatClientAgentSession(), [], CreateResponse()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the constructor throws when the session is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Constructor_NullSession_Throws()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.Throws<ArgumentNullException>("session", () => new LoopContext(
|
||||
new Mock<AIAgent>().Object, null!, [], CreateResponse()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the constructor throws when the initial messages are null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Constructor_NullInitialMessages_Throws()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.Throws<ArgumentNullException>("initialMessages", () => new LoopContext(
|
||||
new Mock<AIAgent>().Object, new ChatClientAgentSession(), null!, CreateResponse()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the constructor throws when the last response is null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Constructor_NullLastResponse_Throws()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.Throws<ArgumentNullException>("lastResponse", () => new LoopContext(
|
||||
new Mock<AIAgent>().Object, new ChatClientAgentSession(), [], null!));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the constructor populates the properties and that LastResponse is never null.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Constructor_ValidArguments_SetsProperties()
|
||||
{
|
||||
// Arrange
|
||||
var agent = new Mock<AIAgent>().Object;
|
||||
var session = new ChatClientAgentSession();
|
||||
ChatMessage[] initialMessages = [new ChatMessage(ChatRole.User, "go")];
|
||||
var response = CreateResponse("done");
|
||||
|
||||
// Act
|
||||
var context = new LoopContext(agent, session, initialMessages, response);
|
||||
|
||||
// Assert
|
||||
Assert.Same(agent, context.Agent);
|
||||
Assert.Same(session, context.Session);
|
||||
Assert.Same(initialMessages, context.InitialMessages);
|
||||
Assert.Same(response, context.LastResponse);
|
||||
Assert.Null(context.RunOptions);
|
||||
Assert.NotNull(context.AdditionalProperties);
|
||||
Assert.Equal(0, context.Iteration);
|
||||
Assert.Empty(context.Feedback);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the session can be replaced through the internal setter (used by the loop for fresh contexts).
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Session_IsInternallySettable()
|
||||
{
|
||||
// Arrange
|
||||
var context = new LoopContext(
|
||||
new Mock<AIAgent>().Object, new ChatClientAgentSession(), [], CreateResponse());
|
||||
var newSession = new ChatClientAgentSession();
|
||||
|
||||
// Act
|
||||
context.Session = newSession;
|
||||
|
||||
// Assert
|
||||
Assert.Same(newSession, context.Session);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that <see cref="LoopContext.Feedback"/> can be assigned through its internal setter.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Feedback_IsInternallySettable()
|
||||
{
|
||||
// Arrange
|
||||
var context = new LoopContext(
|
||||
new Mock<AIAgent>().Object, new ChatClientAgentSession(), [], CreateResponse());
|
||||
|
||||
// Act
|
||||
context.Feedback = ["first", null];
|
||||
|
||||
// Assert
|
||||
Assert.Equal(["first", null], context.Feedback);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that an evaluator can be evaluated against a publicly-constructed context (the scenario the public
|
||||
/// constructor exists to support).
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task PubliclyConstructedContext_CanEvaluateEvaluatorAsync()
|
||||
{
|
||||
// Arrange
|
||||
var context = new LoopContext(
|
||||
new Mock<AIAgent>().Object,
|
||||
new ChatClientAgentSession(),
|
||||
[new ChatMessage(ChatRole.User, "go")],
|
||||
CreateResponse("done"));
|
||||
var evaluator = new DelegateLoopEvaluator((ctx, _) =>
|
||||
new ValueTask<LoopEvaluation>(
|
||||
ctx.LastResponse.Text == "done" ? LoopEvaluation.Stop() : LoopEvaluation.Continue()));
|
||||
|
||||
// Act
|
||||
LoopEvaluation evaluation = await evaluator.EvaluateAsync(context);
|
||||
|
||||
// Assert
|
||||
Assert.False(evaluation.ShouldReinvoke);
|
||||
}
|
||||
|
||||
private static AgentResponse CreateResponse(string text = "response") =>
|
||||
new([new ChatMessage(ChatRole.Assistant, text)]);
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
namespace Microsoft.Agents.AI.UnitTests;
|
||||
|
||||
/// <summary>
|
||||
/// Unit tests for the <see cref="LoopEvaluation"/> class.
|
||||
/// </summary>
|
||||
public class LoopEvaluationTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Verify that Stop produces an evaluation that does not re-invoke and carries no feedback.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Stop_DoesNotReinvoke_AndHasNoFeedback()
|
||||
{
|
||||
// Act
|
||||
var evaluation = LoopEvaluation.Stop();
|
||||
|
||||
// Assert
|
||||
Assert.False(evaluation.ShouldReinvoke);
|
||||
Assert.Null(evaluation.Feedback);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that Continue with no argument re-invokes and carries no feedback.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Continue_NoFeedback_ReinvokesWithNullFeedback()
|
||||
{
|
||||
// Act
|
||||
var evaluation = LoopEvaluation.Continue();
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.Null(evaluation.Feedback);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that Continue with whitespace-only feedback normalizes the feedback to null, matching the documented
|
||||
/// "null, empty, or whitespace is treated as no feedback" semantics.
|
||||
/// </summary>
|
||||
[Theory]
|
||||
[InlineData("")]
|
||||
[InlineData(" ")]
|
||||
[InlineData("\t\n")]
|
||||
public void Continue_WhitespaceFeedback_NormalizesToNull(string feedback)
|
||||
{
|
||||
// Act
|
||||
var evaluation = LoopEvaluation.Continue(feedback);
|
||||
|
||||
// Assert
|
||||
Assert.True(evaluation.ShouldReinvoke);
|
||||
Assert.Null(evaluation.Feedback);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Moq;
|
||||
using Moq.Protected;
|
||||
|
||||
namespace Microsoft.Agents.AI.UnitTests;
|
||||
|
||||
/// <summary>
|
||||
/// Shared helpers used by the LoopAgent and LoopEvaluator unit tests.
|
||||
/// </summary>
|
||||
internal static class LoopTestHelpers
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a <see cref="DelegateLoopEvaluator"/> that re-invokes the agent (without feedback) while the
|
||||
/// supplied predicate returns <see langword="true"/>.
|
||||
/// </summary>
|
||||
public static DelegateLoopEvaluator While(Func<LoopContext, bool> shouldReinvoke) =>
|
||||
new((context, _) =>
|
||||
new ValueTask<LoopEvaluation>(
|
||||
shouldReinvoke(context) ? LoopEvaluation.Continue() : LoopEvaluation.Stop()));
|
||||
|
||||
/// <summary>
|
||||
/// Creates a mocked judge <see cref="IChatClient"/> that always returns the supplied response text.
|
||||
/// </summary>
|
||||
public static IChatClient CreateJudgeClient(string responseText)
|
||||
{
|
||||
var mock = new Mock<IChatClient>();
|
||||
mock.Setup(c => c.GetResponseAsync(
|
||||
It.IsAny<IEnumerable<ChatMessage>>(),
|
||||
It.IsAny<ChatOptions>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, responseText)));
|
||||
return mock.Object;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a mocked judge <see cref="IChatClient"/> that always returns the supplied response text and captures the
|
||||
/// messages it was invoked with via <paramref name="capturedMessages"/>.
|
||||
/// </summary>
|
||||
public static IChatClient CreateCapturingJudgeClient(string responseText, out List<ChatMessage> capturedMessages)
|
||||
{
|
||||
var captured = new List<ChatMessage>();
|
||||
capturedMessages = captured;
|
||||
var mock = new Mock<IChatClient>();
|
||||
mock.Setup(c => c.GetResponseAsync(
|
||||
It.IsAny<IEnumerable<ChatMessage>>(),
|
||||
It.IsAny<ChatOptions>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.Callback<IEnumerable<ChatMessage>, ChatOptions?, CancellationToken>((messages, _, _) =>
|
||||
{
|
||||
captured.Clear();
|
||||
captured.AddRange(messages);
|
||||
})
|
||||
.ReturnsAsync(new ChatResponse(new ChatMessage(ChatRole.Assistant, responseText)));
|
||||
return mock.Object;
|
||||
}
|
||||
|
||||
public static async IAsyncEnumerable<T> ToAsyncEnumerableAsync<T>(
|
||||
IEnumerable<T> items,
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var item in items)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
yield return item;
|
||||
await Task.Yield();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Captures the messages sent to a mocked non-streaming inner agent and produces responses by call index.
|
||||
/// </summary>
|
||||
internal sealed class InnerAgentCapture
|
||||
{
|
||||
public InnerAgentCapture(Func<int, AgentResponse> responseFactory)
|
||||
{
|
||||
this.Mock
|
||||
.Protected()
|
||||
.Setup<Task<AgentResponse>>("RunCoreAsync",
|
||||
ItExpr.IsAny<IEnumerable<ChatMessage>>(),
|
||||
ItExpr.IsAny<AgentSession?>(),
|
||||
ItExpr.IsAny<AgentRunOptions?>(),
|
||||
ItExpr.IsAny<CancellationToken>())
|
||||
.Callback<IEnumerable<ChatMessage>, AgentSession?, AgentRunOptions?, CancellationToken>((msgs, session, _, _) =>
|
||||
{
|
||||
this.CallCount++;
|
||||
this.MessagesPerCall.Add(msgs.ToList());
|
||||
this.SessionsPerCall.Add(session);
|
||||
})
|
||||
.ReturnsAsync(() => responseFactory(this.CallCount));
|
||||
}
|
||||
|
||||
public Mock<AIAgent> Mock { get; } = new();
|
||||
|
||||
public AIAgent Agent => this.Mock.Object;
|
||||
|
||||
public int CallCount { get; private set; }
|
||||
|
||||
public List<List<ChatMessage>> MessagesPerCall { get; } = [];
|
||||
|
||||
public List<AgentSession?> SessionsPerCall { get; } = [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Captures the messages sent to a mocked streaming inner agent and produces updates by call index.
|
||||
/// </summary>
|
||||
internal sealed class InnerStreamingCapture
|
||||
{
|
||||
public InnerStreamingCapture(Func<int, AgentResponseUpdate[]> updatesFactory)
|
||||
{
|
||||
this.Mock
|
||||
.Protected()
|
||||
.Setup<IAsyncEnumerable<AgentResponseUpdate>>("RunCoreStreamingAsync",
|
||||
ItExpr.IsAny<IEnumerable<ChatMessage>>(),
|
||||
ItExpr.IsAny<AgentSession?>(),
|
||||
ItExpr.IsAny<AgentRunOptions?>(),
|
||||
ItExpr.IsAny<CancellationToken>())
|
||||
.Returns<IEnumerable<ChatMessage>, AgentSession?, AgentRunOptions?, CancellationToken>((msgs, _, _, ct) =>
|
||||
{
|
||||
this.CallCount++;
|
||||
this.MessagesPerCall.Add(msgs.ToList());
|
||||
return LoopTestHelpers.ToAsyncEnumerableAsync(updatesFactory(this.CallCount), ct);
|
||||
});
|
||||
}
|
||||
|
||||
public Mock<AIAgent> Mock { get; } = new();
|
||||
|
||||
public AIAgent Agent => this.Mock.Object;
|
||||
|
||||
public int CallCount { get; private set; }
|
||||
|
||||
public List<List<ChatMessage>> MessagesPerCall { get; } = [];
|
||||
}
|
||||
Reference in New Issue
Block a user