From 6d6cb840aec8b85c6bb5e95dc680c8fdd6110394 Mon Sep 17 00:00:00 2001
From: westey <164392973+westey-m@users.noreply.github.com>
Date: Thu, 9 Apr 2026 12:25:00 +0100
Subject: [PATCH] .NET: Improve resilience of verify-samples by building
separately and improving evaluation instructions (#5151)
* Improve resilience of verify-samples by building separately and improving evaluation instructions
* Address PR comments
* Address PR comment
---
.github/workflows/dotnet-verify-samples.yml | 5 ++
.../skills/verify-samples-tool/SKILL.md | 11 +++++
dotnet/eng/verify-samples/Program.cs | 5 +-
dotnet/eng/verify-samples/SampleRunner.cs | 13 ++++-
dotnet/eng/verify-samples/SampleVerifier.cs | 49 ++++++++++++++-----
.../VerificationOrchestrator.cs | 9 ++--
dotnet/eng/verify-samples/VerifyOptions.cs | 20 ++++++++
7 files changed, 95 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/dotnet-verify-samples.yml b/.github/workflows/dotnet-verify-samples.yml
index ad384eb83e..b1c13a275f 100644
--- a/.github/workflows/dotnet-verify-samples.yml
+++ b/.github/workflows/dotnet-verify-samples.yml
@@ -63,6 +63,11 @@ jobs:
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+ - name: Build solution
+ working-directory: dotnet
+ shell: bash
+ run: dotnet build agent-framework-dotnet.slnx -f net10.0 --warnaserror
+
- name: Run verify-samples
id: verify
working-directory: dotnet
diff --git a/dotnet/.github/skills/verify-samples-tool/SKILL.md b/dotnet/.github/skills/verify-samples-tool/SKILL.md
index cbb1b35009..4d4f153bfd 100644
--- a/dotnet/.github/skills/verify-samples-tool/SKILL.md
+++ b/dotnet/.github/skills/verify-samples-tool/SKILL.md
@@ -9,9 +9,16 @@ The `verify-samples` project (`dotnet/eng/verify-samples/`) is an automated tool
## Running verify-samples
+**Important:** By default, samples must be pre-built before running verify-samples. Build the solution first, or pass `--build` to build samples during the run:
+
```bash
cd dotnet
+dotnet build agent-framework-dotnet.slnx -f net10.0
+```
+Then run verify-samples:
+
+```bash
# Run all samples across all categories
dotnet run --project eng/verify-samples -- --log results.log --csv results.csv
@@ -24,6 +31,10 @@ dotnet run --project eng/verify-samples -- Agent_Step02_StructuredOutput Agent_S
# Control parallelism (default 8)
dotnet run --project eng/verify-samples -- --parallel 8 --log results.log
+# Build samples during run (skips the need for a prior build step)
+# This may cause build conflicts as multiple samples are built in parallel, so use with caution
+dotnet run --project eng/verify-samples -- --build --log results.log
+
# Combine options
dotnet run --project eng/verify-samples -- --category 03-workflows --parallel 4 --log results.log --csv results.csv --md results.md
```
diff --git a/dotnet/eng/verify-samples/Program.cs b/dotnet/eng/verify-samples/Program.cs
index 7f27d37dd5..ebddc4b16b 100644
--- a/dotnet/eng/verify-samples/Program.cs
+++ b/dotnet/eng/verify-samples/Program.cs
@@ -14,6 +14,9 @@
// dotnet run -- --log results.log # Write sequential log to file
// dotnet run -- --csv results.csv # Write CSV summary to file
// dotnet run -- --md results.md # Write Markdown summary to file
+// dotnet run -- --build # Build samples during run (default: --no-build)
+// Note: By default, this tool expects sample build outputs to already exist.
+// Pre-build the solution before running, or pass --build to avoid missing build output failures.
//
// Required environment variables (for AI-powered samples):
// AZURE_OPENAI_ENDPOINT
@@ -63,7 +66,7 @@ try
// Run all samples
var reporter = new ConsoleReporter();
var verifier = new SampleVerifier(chatClient);
- var orchestrator = new VerificationOrchestrator(verifier, reporter, dotnetRoot, TimeSpan.FromMinutes(3), logWriter);
+ var orchestrator = new VerificationOrchestrator(verifier, reporter, dotnetRoot, TimeSpan.FromMinutes(3), logWriter, buildSamples: options.BuildSamples);
var run = await orchestrator.RunAllAsync(options.Samples, options.MaxParallelism);
diff --git a/dotnet/eng/verify-samples/SampleRunner.cs b/dotnet/eng/verify-samples/SampleRunner.cs
index 0fabd82262..f8bd3cc0e6 100644
--- a/dotnet/eng/verify-samples/SampleRunner.cs
+++ b/dotnet/eng/verify-samples/SampleRunner.cs
@@ -20,23 +20,32 @@ internal static class SampleRunner
{
///
/// Runs dotnet run --framework net10.0 in the given project directory.
+ /// When is false (the default), --no-build is passed
+ /// to skip building, assuming the project was pre-built.
///
public static Task RunAsync(
string projectPath,
TimeSpan timeout,
+ bool build = false,
CancellationToken cancellationToken = default)
- => RunAsync(projectPath, "run --framework net10.0", timeout, inputs: null, inputDelayMs: 0, cancellationToken: cancellationToken);
+ => RunAsync(projectPath, DotnetRunArgs(build), timeout, inputs: null, inputDelayMs: 0, cancellationToken: cancellationToken);
///
/// Runs dotnet run --framework net10.0 with stdin inputs.
+ /// When is false (the default), --no-build is passed
+ /// to skip building, assuming the project was pre-built.
///
public static Task RunAsync(
string projectPath,
TimeSpan timeout,
string?[]? inputs,
int inputDelayMs = 2000,
+ bool build = false,
CancellationToken cancellationToken = default)
- => RunAsync(projectPath, "run --framework net10.0", timeout, inputs, inputDelayMs, cancellationToken);
+ => RunAsync(projectPath, DotnetRunArgs(build), timeout, inputs, inputDelayMs, cancellationToken);
+
+ private static string DotnetRunArgs(bool build) =>
+ $"run {(build ? "" : "--no-build")} --framework net10.0";
///
/// Runs an arbitrary dotnet command in the given working directory.
diff --git a/dotnet/eng/verify-samples/SampleVerifier.cs b/dotnet/eng/verify-samples/SampleVerifier.cs
index 9dc17b1769..ae28aa835f 100644
--- a/dotnet/eng/verify-samples/SampleVerifier.cs
+++ b/dotnet/eng/verify-samples/SampleVerifier.cs
@@ -1,5 +1,6 @@
// Copyright (c) Microsoft. All rights reserved.
+using System.ComponentModel;
using System.Text.Json.Serialization;
using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
@@ -27,11 +28,19 @@ internal sealed class SampleVerifier
instructions: """
You are a test output verifier. You will be given:
1. The actual stdout output of a program
- 2. A list of expectations about what the output should contain or demonstrate
+ 2. The stderr output (if any)
+ 3. A list of expectations about what the output should contain or demonstrate
Your job is to determine whether the actual output satisfies each expectation.
Be reasonable — the output comes from an LLM so exact wording won't match, but the
semantic intent should be clearly satisfied.
+
+ In your response, you MUST:
+ - Always provide ai_reasoning with a brief overall assessment.
+ - Always provide exactly one entry in expectation_results for each expectation,
+ in the same order as the input list.
+ - For each expectation_results entry, echo the expectation text in the expectation
+ field and explain your assessment in the detail field, citing evidence from the output.
""",
name: "OutputVerifier");
}
@@ -78,7 +87,7 @@ internal sealed class SampleVerifier
}
else
{
- var aiResult = await this.VerifyWithAIAsync(run.Stdout, sample.ExpectedOutputDescription);
+ var aiResult = await this.VerifyWithAIAsync(run.Stdout, run.Stderr, sample.ExpectedOutputDescription);
aiReasoning = aiResult.Reasoning;
foreach (var unmet in aiResult.UnmetExpectations)
@@ -100,16 +109,28 @@ internal sealed class SampleVerifier
}
private async Task<(string Reasoning, List UnmetExpectations)> VerifyWithAIAsync(
- string actualOutput,
+ string stdout,
+ string stderr,
string[] expectations)
{
var expectationList = string.Join("\n", expectations.Select((e, i) => $" {i + 1}. {e}"));
+
+ var stderrSection = string.IsNullOrWhiteSpace(stderr)
+ ? ""
+ : $"""
+
+ Stderr output:
+ ---
+ {Truncate(stderr, 2000)}
+ ---
+ """;
+
var prompt = $"""
Actual program output:
---
- {Truncate(actualOutput, 4000)}
+ {Truncate(stdout, 4000)}
---
-
+ {stderrSection}
Expectations to verify:
{expectationList}
@@ -126,7 +147,9 @@ internal sealed class SampleVerifier
return ($"AI verification returned null result. Raw: {response.Text}", ["AI verification returned null result."]);
}
- var reasoning = result.Reasoning ?? "(no reasoning provided)";
+ var reasoning = string.IsNullOrWhiteSpace(result.AIReasoning)
+ ? "(no reasoning provided)"
+ : result.AIReasoning;
// Collect unmet expectations as individual failures
var unmet = new List();
@@ -174,12 +197,14 @@ internal sealed class AIVerificationResponse
public bool Pass { get; set; }
/// Brief explanation of the overall assessment.
- [JsonPropertyName("reasoning")]
- public string? Reasoning { get; set; }
+ [JsonPropertyName("ai_reasoning")]
+ [Description("Always required. Brief explanation of the overall assessment, covering all expectations.")]
+ public string AIReasoning { get; set; } = string.Empty;
/// Per-expectation results.
[JsonPropertyName("expectation_results")]
- public List? ExpectationResults { get; set; }
+ [Description("Always required. One entry per expectation, in the same order as the input list.")]
+ public List ExpectationResults { get; set; } = [];
}
///
@@ -190,7 +215,8 @@ internal sealed class ExpectationResult
{
/// The expectation text that was evaluated.
[JsonPropertyName("expectation")]
- public string? Expectation { get; set; }
+ [Description("Echo back the expectation text being evaluated.")]
+ public string Expectation { get; set; } = string.Empty;
/// Whether this expectation was met.
[JsonPropertyName("met")]
@@ -198,5 +224,6 @@ internal sealed class ExpectationResult
/// Detail about how the expectation was or was not met.
[JsonPropertyName("detail")]
- public string? Detail { get; set; }
+ [Description("Explain how the expectation was or was not met, citing specific evidence from the output.")]
+ public string Detail { get; set; } = string.Empty;
}
diff --git a/dotnet/eng/verify-samples/VerificationOrchestrator.cs b/dotnet/eng/verify-samples/VerificationOrchestrator.cs
index 1ce805bc5a..b55efc9c14 100644
--- a/dotnet/eng/verify-samples/VerificationOrchestrator.cs
+++ b/dotnet/eng/verify-samples/VerificationOrchestrator.cs
@@ -14,19 +14,22 @@ internal sealed class VerificationOrchestrator
private readonly LogFileWriter? _logWriter;
private readonly string _dotnetRoot;
private readonly TimeSpan _timeout;
+ private readonly bool _buildSamples;
public VerificationOrchestrator(
SampleVerifier verifier,
ConsoleReporter reporter,
string dotnetRoot,
TimeSpan timeout,
- LogFileWriter? logWriter = null)
+ LogFileWriter? logWriter = null,
+ bool buildSamples = false)
{
this._verifier = verifier;
this._reporter = reporter;
this._logWriter = logWriter;
this._dotnetRoot = dotnetRoot;
this._timeout = timeout;
+ this._buildSamples = buildSamples;
}
///
@@ -136,8 +139,8 @@ internal sealed class VerificationOrchestrator
var projectPath = Path.Combine(this._dotnetRoot, sample.ProjectPath);
var run = sample.Inputs.Length > 0
- ? await SampleRunner.RunAsync(projectPath, this._timeout, sample.Inputs, sample.InputDelayMs)
- : await SampleRunner.RunAsync(projectPath, this._timeout);
+ ? await SampleRunner.RunAsync(projectPath, this._timeout, sample.Inputs, sample.InputDelayMs, build: this._buildSamples)
+ : await SampleRunner.RunAsync(projectPath, this._timeout, build: this._buildSamples);
log.Add($"[{sample.Name}] Completed ({run.Elapsed.TotalSeconds:F1}s, exit={run.ExitCode})");
this._reporter.WriteLineWithPrefix(
diff --git a/dotnet/eng/verify-samples/VerifyOptions.cs b/dotnet/eng/verify-samples/VerifyOptions.cs
index 78ba38acf1..95e0af8795 100644
--- a/dotnet/eng/verify-samples/VerifyOptions.cs
+++ b/dotnet/eng/verify-samples/VerifyOptions.cs
@@ -27,6 +27,12 @@ internal sealed class VerifyOptions
///
public string? LogFilePath { get; init; }
+ ///
+ /// When true, samples are built as part of dotnet run.
+ /// When false (the default), --no-build is passed, assuming a prior build step.
+ ///
+ public bool BuildSamples { get; init; }
+
///
/// The filtered list of samples to process.
///
@@ -55,6 +61,7 @@ internal sealed class VerifyOptions
var logFilePath = ExtractArg(argList, "--log");
var csvFilePath = ExtractArg(argList, "--csv");
var markdownFilePath = ExtractArg(argList, "--md");
+ var buildSamples = ExtractFlag(argList, "--build");
int maxParallelism = 8;
var parallelArg = ExtractArg(argList, "--parallel");
@@ -105,6 +112,7 @@ internal sealed class VerifyOptions
LogFilePath = logFilePath,
CsvFilePath = csvFilePath,
MarkdownFilePath = markdownFilePath,
+ BuildSamples = buildSamples,
Samples = samples,
};
}
@@ -128,4 +136,16 @@ internal sealed class VerifyOptions
list.RemoveRange(idx, 2);
return value;
}
+
+ private static bool ExtractFlag(List list, string flag)
+ {
+ var idx = list.IndexOf(flag);
+ if (idx < 0)
+ {
+ return false;
+ }
+
+ list.RemoveAt(idx);
+ return true;
+ }
}