// Copyright (c) Microsoft. All rights reserved.
using System.ComponentModel;
using System.Text.Json.Serialization;
using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
using OpenAI.Chat;
namespace VerifySamples;
///
/// Verifies sample output using deterministic checks and an AI agent
/// for non-deterministic output validation.
///
internal sealed class SampleVerifier
{
private readonly AIAgent? _verifierAgent;
///
/// Creates a verifier. If is provided,
/// AI-based verification is available for non-deterministic samples.
///
public SampleVerifier(ChatClient? chatClient = null)
{
if (chatClient is not null)
{
this._verifierAgent = chatClient.AsAIAgent(
instructions: """
You are a test output verifier. You will be given:
1. The actual stdout output of a program
2. The stderr output (if any)
3. A list of expectations about what the output should contain or demonstrate
Your job is to determine whether the actual output satisfies each expectation.
Be reasonable — the output comes from an LLM so exact wording won't match, but the
semantic intent should be clearly satisfied.
In your response, you MUST:
- Always provide ai_reasoning with a brief overall assessment.
- Always provide exactly one entry in expectation_results for each expectation,
in the same order as the input list.
- For each expectation_results entry, echo the expectation text in the expectation
field and explain your assessment in the detail field, citing evidence from the output.
""",
name: "OutputVerifier");
}
}
///
/// Verifies the output of a sample run against its definition.
///
public async Task VerifyAsync(SampleDefinition sample, SampleRunResult run)
{
var failures = new List();
// 1. Exit code check
if (run.ExitCode != 0)
{
failures.Add($"Exit code was {run.ExitCode}, expected 0. Stderr: {Truncate(run.Stderr, 500)}");
}
// 2. Must-contain checks
foreach (var expected in sample.MustContain)
{
if (!run.Stdout.Contains(expected, StringComparison.Ordinal))
{
failures.Add($"Output missing expected substring: \"{expected}\"");
}
}
// 3. Must-not-contain checks
foreach (var unexpected in sample.MustNotContain)
{
if (run.Stdout.Contains(unexpected, StringComparison.Ordinal))
{
failures.Add($"Output contains unexpected substring: \"{unexpected}\"");
}
}
// 4. AI verification for non-deterministic samples
string? aiReasoning = null;
if (!sample.IsDeterministic && sample.ExpectedOutputDescription.Length > 0)
{
if (this._verifierAgent is null)
{
failures.Add("AI verification required but no AI agent configured (missing AZURE_OPENAI_ENDPOINT).");
}
else
{
var aiResult = await this.VerifyWithAIAsync(run.Stdout, run.Stderr, sample.ExpectedOutputDescription);
aiReasoning = aiResult.Reasoning;
foreach (var unmet in aiResult.UnmetExpectations)
{
failures.Add($"AI expectation not met: {unmet}");
}
}
}
bool passed = failures.Count == 0;
return new VerificationResult
{
SampleName = sample.Name,
Passed = passed,
Summary = passed ? "All checks passed" : $"{failures.Count} check(s) failed",
Failures = failures,
AIReasoning = aiReasoning,
};
}
private async Task<(string Reasoning, List UnmetExpectations)> VerifyWithAIAsync(
string stdout,
string stderr,
string[] expectations)
{
var expectationList = string.Join("\n", expectations.Select((e, i) => $" {i + 1}. {e}"));
var stderrSection = string.IsNullOrWhiteSpace(stderr)
? ""
: $"""
Stderr output:
---
{Truncate(stderr, 2000)}
---
""";
var prompt = $"""
Actual program output:
---
{Truncate(stdout, 4000)}
---
{stderrSection}
Expectations to verify:
{expectationList}
Does the output satisfy all expectations?
""";
try
{
var response = await this._verifierAgent!.RunAsync(prompt);
var result = response.Result;
if (result is null)
{
return ($"AI verification returned null result. Raw: {response.Text}", ["AI verification returned null result."]);
}
var reasoning = string.IsNullOrWhiteSpace(result.AIReasoning)
? "(no reasoning provided)"
: result.AIReasoning;
// Collect unmet expectations as individual failures
var unmet = new List();
if (result.ExpectationResults is { Count: > 0 })
{
foreach (var er in result.ExpectationResults.Where(er => !er.Met))
{
var detail = string.IsNullOrWhiteSpace(er.Detail) ? er.Expectation : $"{er.Expectation} — {er.Detail}";
unmet.Add(detail ?? "Unknown expectation");
}
// If the model flagged overall failure but all individual expectations were met,
// still treat as failure using the overall reasoning.
if (unmet.Count == 0 && !result.Pass)
{
unmet.Add(reasoning);
}
}
else if (!result.Pass)
{
// Fallback: no per-expectation detail but overall pass is false
unmet.Add(reasoning);
}
return (reasoning, unmet);
}
catch (Exception ex)
{
return ($"AI verification error: {ex.Message}", [$"AI verification error: {ex.Message}"]);
}
}
private static string Truncate(string text, int maxLength)
=> text.Length <= maxLength ? text : text[..maxLength] + "... (truncated)";
}
///
/// Structured response from the AI verification agent.
///
[System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1812:Avoid uninstantiated internal classes", Justification = "Instantiated by JSON deserialization via RunAsync.")]
internal sealed class AIVerificationResponse
{
/// Whether all expectations were met.
[JsonPropertyName("pass")]
public bool Pass { get; set; }
/// Brief explanation of the overall assessment.
[JsonPropertyName("ai_reasoning")]
[Description("Always required. Brief explanation of the overall assessment, covering all expectations.")]
public string AIReasoning { get; set; } = string.Empty;
/// Per-expectation results.
[JsonPropertyName("expectation_results")]
[Description("Always required. One entry per expectation, in the same order as the input list.")]
public List ExpectationResults { get; set; } = [];
}
///
/// Result for an individual expectation check.
///
[System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1812:Avoid uninstantiated internal classes", Justification = "Instantiated by JSON deserialization via RunAsync.")]
internal sealed class ExpectationResult
{
/// The expectation text that was evaluated.
[JsonPropertyName("expectation")]
[Description("Echo back the expectation text being evaluated.")]
public string Expectation { get; set; } = string.Empty;
/// Whether this expectation was met.
[JsonPropertyName("met")]
public bool Met { get; set; }
/// Detail about how the expectation was or was not met.
[JsonPropertyName("detail")]
[Description("Explain how the expectation was or was not met, citing specific evidence from the output.")]
public string Detail { get; set; } = string.Empty;
}