mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Merge branch 'main' into dev/dotnet_workflow/mark_magentic_experimental
This commit is contained in:
@@ -273,6 +273,8 @@ jobs:
|
||||
-c ${{ matrix.configuration }} `
|
||||
--no-build -v Normal `
|
||||
--report-xunit-trx `
|
||||
--report-junit `
|
||||
--results-directory ../IntegrationTestResults/ `
|
||||
--ignore-exit-code 8 `
|
||||
--filter-not-trait "Category=IntegrationDisabled" `
|
||||
--filter-not-trait "Category=FoundryHostedAgents" `
|
||||
@@ -294,6 +296,10 @@ jobs:
|
||||
AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
|
||||
AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZURE_AI_MODEL_DEPLOYMENT_NAME }}
|
||||
AZURE_AI_BING_CONNECTION_ID: ${{ vars.AZURE_AI_BING_CONNECTION_ID }}
|
||||
# Anthropic Models
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
ANTHROPIC_CHAT_MODEL_NAME: ${{ vars.ANTHROPIC_CHAT_MODEL_NAME }}
|
||||
ANTHROPIC_REASONING_MODEL_NAME: ${{ vars.ANTHROPIC_REASONING_MODEL_NAME }}
|
||||
|
||||
# Generate test reports and check coverage
|
||||
- name: Generate test reports
|
||||
@@ -316,6 +322,14 @@ jobs:
|
||||
shell: pwsh
|
||||
run: ./dotnet/eng/scripts/dotnet-check-coverage.ps1 -JsonReportPath "TestResults/Reports/Summary.json" -CoverageThreshold $env:COVERAGE_THRESHOLD
|
||||
|
||||
- name: Upload integration test results
|
||||
if: always() && github.event_name != 'pull_request' && matrix.integration-tests
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }}
|
||||
path: IntegrationTestResults/**/*.junit
|
||||
if-no-files-found: ignore
|
||||
|
||||
# The Foundry hosted-agent IT is costly (it builds a container, pushes to ACR, and provisions
|
||||
# live agents on a separate Foundry project). Running it in its own job keeps the overall
|
||||
# workflow time roughly flat: it executes in parallel to dotnet-build and dotnet-test and is
|
||||
@@ -456,3 +470,64 @@ jobs:
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: core.setFailed('Integration Tests Cancelled!')
|
||||
|
||||
# Integration test trend report (aggregates JUnit XML results from dotnet test jobs)
|
||||
dotnet-integration-test-report:
|
||||
name: Integration Test Report
|
||||
if: >
|
||||
always() &&
|
||||
github.event_name != 'pull_request' &&
|
||||
(contains(join(needs.*.result, ','), 'success') ||
|
||||
contains(join(needs.*.result, ','), 'failure'))
|
||||
needs: [dotnet-test]
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: python
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
sparse-checkout: |
|
||||
.github/actions/python-setup
|
||||
python
|
||||
- name: Set up python and install the project
|
||||
uses: ./.github/actions/python-setup
|
||||
with:
|
||||
python-version: "3.13"
|
||||
os: ${{ runner.os }}
|
||||
- name: Download all test results from current run
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: dotnet-test-results-*
|
||||
path: dotnet-test-results/
|
||||
- name: Restore report history cache
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: python/dotnet-integration-report-history.json
|
||||
key: dotnet-integration-report-history-${{ github.run_id }}
|
||||
restore-keys: |
|
||||
dotnet-integration-report-history-
|
||||
- name: Generate trend report
|
||||
run: >
|
||||
uv run python scripts/integration_test_report/aggregate.py
|
||||
../dotnet-test-results/
|
||||
dotnet-integration-report-history.json
|
||||
dotnet-integration-test-report.md
|
||||
- name: Post to Job Summary
|
||||
if: always()
|
||||
run: cat dotnet-integration-test-report.md >> $GITHUB_STEP_SUMMARY
|
||||
- name: Save report history cache
|
||||
if: always()
|
||||
uses: actions/cache/save@v4
|
||||
with:
|
||||
path: python/dotnet-integration-report-history.json
|
||||
key: dotnet-integration-report-history-${{ github.run_id }}
|
||||
- name: Upload trend report
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: dotnet-integration-test-report
|
||||
path: |
|
||||
python/dotnet-integration-test-report.md
|
||||
python/dotnet-integration-report-history.json
|
||||
|
||||
@@ -33,3 +33,4 @@ Console.WriteLine(await agent.RunAsync("Write a haiku about Microsoft Agent Fram
|
||||
- [Design Documents](../docs/design)
|
||||
- [Architectural Decision Records](../docs/decisions)
|
||||
- [MSFT Learn Docs](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview)
|
||||
|
||||
|
||||
@@ -13,5 +13,5 @@ internal sealed class SequenceNumber
|
||||
/// Gets the next sequence number.
|
||||
/// </summary>
|
||||
/// <returns>The next sequence number.</returns>
|
||||
public int Increment() => this._sequenceNumber++;
|
||||
public int Increment() => System.Threading.Interlocked.Increment(ref this._sequenceNumber) - 1;
|
||||
}
|
||||
|
||||
+16
-4
@@ -17,9 +17,6 @@ namespace AnthropicChatCompletion.IntegrationTests;
|
||||
|
||||
public class AnthropicChatCompletionFixture : IChatClientAgentFixture
|
||||
{
|
||||
// All tests for Anthropic are intended to be ran locally as the CI pipeline for Anthropic is not setup.
|
||||
internal const string SkipReason = "Integrations tests for local execution only";
|
||||
|
||||
private readonly bool _useReasoningModel;
|
||||
private readonly bool _useBeta;
|
||||
|
||||
@@ -105,7 +102,22 @@ public class AnthropicChatCompletionFixture : IChatClientAgentFixture
|
||||
|
||||
public async ValueTask InitializeAsync()
|
||||
{
|
||||
Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);
|
||||
// Temporarily disabled: Anthropic SDK has a binary incompatibility with the current
|
||||
// Microsoft.Extensions.AI version (WebSearchToolResultContent.Results method not found).
|
||||
// See: https://github.com/microsoft/agent-framework/pull/5515
|
||||
Assert.Skip("Anthropic integration tests temporarily disabled due to SDK incompatibility with Microsoft.Extensions.AI");
|
||||
|
||||
try
|
||||
{
|
||||
_ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey);
|
||||
_ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
|
||||
_ = TestConfiguration.GetRequiredValue(TestSettings.AnthropicReasoningModelName);
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
|
||||
}
|
||||
|
||||
this._agent = await this.CreateChatClientAgentAsync();
|
||||
}
|
||||
|
||||
|
||||
+28
-12
@@ -1,5 +1,6 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
using AgentConformance.IntegrationTests.Support;
|
||||
using Anthropic;
|
||||
@@ -17,19 +18,28 @@ namespace AnthropicChatCompletion.IntegrationTests;
|
||||
/// Integration tests for Anthropic Skills functionality.
|
||||
/// These tests are designed to be run locally with a valid Anthropic API key.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Temporarily disabled due to Anthropic SDK binary incompatibility with
|
||||
/// the current Microsoft.Extensions.AI version (WebSearchToolResultContent.Results).
|
||||
/// </remarks>
|
||||
[Trait("Category", "IntegrationDisabled")]
|
||||
public sealed class AnthropicSkillsIntegrationTests
|
||||
{
|
||||
// All tests for Anthropic are intended to be ran locally as the CI pipeline for Anthropic is not setup.
|
||||
private const string SkipReason = "Integrations tests for local execution only";
|
||||
|
||||
[Fact]
|
||||
public async Task CreateAgentWithPptxSkillAsync()
|
||||
{
|
||||
Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);
|
||||
|
||||
// Arrange
|
||||
AnthropicClient anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
|
||||
string model = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
|
||||
AnthropicClient? anthropicClient;
|
||||
string? model;
|
||||
try
|
||||
{
|
||||
anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
|
||||
model = TestConfiguration.GetRequiredValue(TestSettings.AnthropicChatModelName);
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
|
||||
return;
|
||||
}
|
||||
|
||||
BetaSkillParams pptxSkill = new()
|
||||
{
|
||||
@@ -56,10 +66,16 @@ public sealed class AnthropicSkillsIntegrationTests
|
||||
[Fact]
|
||||
public async Task ListAnthropicManagedSkillsAsync()
|
||||
{
|
||||
Assert.SkipWhen(SkipReason is not null, SkipReason ?? string.Empty);
|
||||
|
||||
// Arrange
|
||||
AnthropicClient anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
|
||||
AnthropicClient? anthropicClient;
|
||||
try
|
||||
{
|
||||
anthropicClient = new() { ApiKey = TestConfiguration.GetRequiredValue(TestSettings.AnthropicApiKey) };
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
Assert.Skip("Anthropic configuration could not be loaded. Error:" + ex.Message);
|
||||
return;
|
||||
}
|
||||
|
||||
// Act
|
||||
SkillListPage skills = await anthropicClient.Beta.Skills.List(
|
||||
|
||||
+15
-27
@@ -13,8 +13,6 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests;
|
||||
[Trait("Category", "SampleValidation")]
|
||||
public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper) : SamplesValidationBase(outputHelper)
|
||||
{
|
||||
private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971";
|
||||
|
||||
private static readonly string s_samplesPath = Path.GetFullPath(
|
||||
Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "..", "..", "samples", "04-hosting", "DurableAgents", "ConsoleApps"));
|
||||
|
||||
@@ -69,7 +67,7 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task SingleAgentOrchestrationChainingSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
|
||||
@@ -105,7 +103,7 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task MultiAgentConcurrencySampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
|
||||
@@ -160,7 +158,7 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task MultiAgentConditionalSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
|
||||
@@ -237,14 +235,14 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
Assert.True(foundSuccess, "Orchestration did not complete successfully.");
|
||||
}
|
||||
|
||||
[Fact(Skip = SkipFlakyTimingTest)]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task SingleAgentOrchestrationHITLSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "05_AgentOrchestration_HITL");
|
||||
|
||||
await this.RunSampleTestAsync(samplePath, async (process, logs) =>
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts();
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(180));
|
||||
|
||||
// Start the HITL orchestration following the happy path from README
|
||||
await this.WriteInputAsync(process, "The Future of Artificial Intelligence", testTimeoutCts.Token);
|
||||
@@ -260,7 +258,7 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
while ((line = this.ReadLogLine(logs, testTimeoutCts.Token)) != null)
|
||||
{
|
||||
// Look for notification that content is ready. The first time we see this, we should send a rejection.
|
||||
// The second time we see this, we should send approval.
|
||||
// Subsequent times we see this, we should send approval (LLM may produce extra review cycles).
|
||||
if (line.Contains("Content is ready for review", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (!rejectionSent)
|
||||
@@ -275,20 +273,15 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
testTimeoutCts.Token);
|
||||
rejectionSent = true;
|
||||
}
|
||||
else if (!approvalSent)
|
||||
else
|
||||
{
|
||||
// Prompt: Approve? (y/n):
|
||||
// Approve any subsequent draft (LLM non-determinism may produce extra review cycles)
|
||||
await this.WriteInputAsync(process, "y", testTimeoutCts.Token);
|
||||
|
||||
// Prompt: Feedback (optional):
|
||||
await this.WriteInputAsync(process, "Looks good!", testTimeoutCts.Token);
|
||||
approvalSent = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// This should never happen
|
||||
Assert.Fail("Unexpected message found.");
|
||||
}
|
||||
}
|
||||
|
||||
// Look for success message
|
||||
@@ -311,14 +304,14 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
});
|
||||
}
|
||||
|
||||
[Fact(Skip = SkipFlakyTimingTest)]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task LongRunningToolsSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "06_LongRunningTools");
|
||||
await this.RunSampleTestAsync(samplePath, async (process, logs) =>
|
||||
{
|
||||
// This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation.
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90));
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(180));
|
||||
|
||||
// Test starting an agent that schedules a content generation orchestration
|
||||
await this.WriteInputAsync(
|
||||
@@ -335,7 +328,7 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
while ((line = this.ReadLogLine(logs, testTimeoutCts.Token)) != null)
|
||||
{
|
||||
// Look for notification that content is ready. The first time we see this, we should send a rejection.
|
||||
// The second time we see this, we should send approval.
|
||||
// Subsequent times we see this, we should send approval (LLM may produce extra review cycles).
|
||||
if (line.Contains("NOTIFICATION: Please review the following content for approval", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Wait for the notification to be fully written to the console
|
||||
@@ -350,20 +343,15 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
testTimeoutCts.Token);
|
||||
rejectionSent = true;
|
||||
}
|
||||
else if (!approvalSent)
|
||||
else
|
||||
{
|
||||
// Approve the content. Note that we need to send a newline character to the console first before sending the input.
|
||||
// Approve any subsequent draft (LLM non-determinism may produce extra review cycles)
|
||||
await this.WriteInputAsync(
|
||||
process,
|
||||
"\nApprove the content",
|
||||
testTimeoutCts.Token);
|
||||
approvalSent = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// This should never happen
|
||||
Assert.Fail("Unexpected message found.");
|
||||
}
|
||||
}
|
||||
|
||||
// Look for success message
|
||||
@@ -396,14 +384,14 @@ public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper)
|
||||
});
|
||||
}
|
||||
|
||||
[Fact(Skip = SkipFlakyTimingTest)]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task ReliableStreamingSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "07_ReliableStreaming");
|
||||
await this.RunSampleTestAsync(samplePath, async (process, logs) =>
|
||||
{
|
||||
// This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation.
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90));
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(150));
|
||||
|
||||
// Test the agent endpoint with a simple prompt
|
||||
await this.WriteInputAsync(process, "Plan a 5-day trip to Seattle. Include daily activities.", testTimeoutCts.Token);
|
||||
|
||||
+4
-6
@@ -19,11 +19,9 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests;
|
||||
[Trait("Category", "Integration")]
|
||||
public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDisposable
|
||||
{
|
||||
private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971";
|
||||
|
||||
private static readonly TimeSpan s_defaultTimeout = Debugger.IsAttached
|
||||
? TimeSpan.FromMinutes(5)
|
||||
: TimeSpan.FromSeconds(60);
|
||||
: TimeSpan.FromSeconds(120);
|
||||
|
||||
private static readonly IConfiguration s_configuration =
|
||||
new ConfigurationBuilder()
|
||||
@@ -38,7 +36,7 @@ public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDispo
|
||||
|
||||
public void Dispose() => this._cts.Dispose();
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task SimplePromptAsync()
|
||||
{
|
||||
// Setup
|
||||
@@ -77,7 +75,7 @@ public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDispo
|
||||
Assert.Contains(agentLogs, log => log.EventId.Name == "LogAgentResponse");
|
||||
}
|
||||
|
||||
[Fact(Skip = SkipFlakyTimingTest)]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task CallFunctionToolsAsync()
|
||||
{
|
||||
int weatherToolInvocationCount = 0;
|
||||
@@ -129,7 +127,7 @@ public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDispo
|
||||
Assert.Equal(1, packingListToolInvocationCount);
|
||||
}
|
||||
|
||||
[Fact(Skip = SkipFlakyTimingTest)]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task CallLongRunningFunctionToolsAsync()
|
||||
{
|
||||
[Description("Starts a greeting workflow and returns the workflow instance ID")]
|
||||
|
||||
+1
-1
@@ -217,7 +217,7 @@ public abstract class SamplesValidationBase : IAsyncLifetime
|
||||
/// </summary>
|
||||
protected CancellationTokenSource CreateTestTimeoutCts(TimeSpan? timeout = null)
|
||||
{
|
||||
TimeSpan testTimeout = Debugger.IsAttached ? TimeSpan.FromMinutes(5) : timeout ?? TimeSpan.FromSeconds(60);
|
||||
TimeSpan testTimeout = Debugger.IsAttached ? TimeSpan.FromMinutes(5) : timeout ?? TimeSpan.FromSeconds(120);
|
||||
return new CancellationTokenSource(testTimeout);
|
||||
}
|
||||
|
||||
|
||||
+8
-8
@@ -22,7 +22,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
/// <inheritdoc />
|
||||
protected override string TaskHubPrefix => "workflow";
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task SequentialWorkflowSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
@@ -71,7 +71,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task ConcurrentWorkflowSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
@@ -120,7 +120,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task ConditionalEdgesWorkflowSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
@@ -182,7 +182,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task WorkflowEventsSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
@@ -278,7 +278,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task WorkflowSharedStateSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
@@ -376,7 +376,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task SubWorkflowsSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
@@ -452,7 +452,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task WorkflowHITLSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
@@ -505,7 +505,7 @@ public sealed class WorkflowConsoleAppSamplesValidation(ITestOutputHelper output
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task WorkflowAndAgentsSampleValidationAsync()
|
||||
{
|
||||
using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(s_testTimeout);
|
||||
|
||||
+12
-14
@@ -15,8 +15,6 @@ namespace Microsoft.Agents.AI.Hosting.AzureFunctions.IntegrationTests;
|
||||
[Trait("Category", "SampleValidation")]
|
||||
public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLifetime
|
||||
{
|
||||
private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971";
|
||||
|
||||
private const string AzureFunctionsPort = "7071";
|
||||
private const string AzuritePort = "10000";
|
||||
private const string DtsPort = "8080";
|
||||
@@ -37,7 +35,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
.Build();
|
||||
|
||||
private static bool s_infrastructureStarted;
|
||||
private static readonly TimeSpan s_orchestrationTimeout = TimeSpan.FromMinutes(2);
|
||||
private static readonly TimeSpan s_orchestrationTimeout = TimeSpan.FromMinutes(3);
|
||||
|
||||
// In CI, `dotnet run` builds the Functions project from scratch before the host starts, so 60s is not enough.
|
||||
private static readonly TimeSpan s_functionsReadyTimeout = TimeSpan.FromSeconds(180);
|
||||
@@ -62,7 +60,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task SingleAgentSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "01_SingleAgent");
|
||||
@@ -107,7 +105,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[Fact(Skip = "Flaky: LLM non-determinism can produce null orchestration results")]
|
||||
public async Task SingleAgentOrchestrationChainingSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "02_AgentOrchestration_Chaining");
|
||||
@@ -150,7 +148,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task MultiAgentOrchestrationConcurrentSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "03_AgentOrchestration_Concurrency");
|
||||
@@ -200,7 +198,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task MultiAgentOrchestrationConditionalsSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "04_AgentOrchestration_Conditionals");
|
||||
@@ -218,7 +216,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task SingleAgentOrchestrationHITLSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "05_AgentOrchestration_HITL");
|
||||
@@ -274,7 +272,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
});
|
||||
}
|
||||
|
||||
[Fact(Skip = SkipFlakyTimingTest)]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task LongRunningToolsSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "06_LongRunningTools");
|
||||
@@ -316,7 +314,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
}
|
||||
},
|
||||
message: "Orchestration is requesting human feedback",
|
||||
timeout: TimeSpan.FromSeconds(60));
|
||||
timeout: TimeSpan.FromSeconds(180));
|
||||
|
||||
// Approve the content
|
||||
Uri approvalUri = new($"{runAgentUri}?thread_id={sessionId}");
|
||||
@@ -336,7 +334,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
}
|
||||
},
|
||||
message: "Content published notification is logged",
|
||||
timeout: TimeSpan.FromSeconds(60));
|
||||
timeout: TimeSpan.FromSeconds(180));
|
||||
|
||||
// Verify the final orchestration status by asking the agent for the status
|
||||
Uri statusUri = new($"{runAgentUri}?thread_id={sessionId}");
|
||||
@@ -360,11 +358,11 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
return isCompleted && hasContent;
|
||||
},
|
||||
message: "Orchestration is completed",
|
||||
timeout: TimeSpan.FromSeconds(60));
|
||||
timeout: TimeSpan.FromSeconds(180));
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task AgentAsMcpToolAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "07_AgentAsMcpTool");
|
||||
@@ -404,7 +402,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi
|
||||
});
|
||||
}
|
||||
|
||||
[Fact(Skip = SkipFlakyTimingTest)]
|
||||
[RetryFact(2, 5000)]
|
||||
public async Task ReliableStreamingSampleValidationAsync()
|
||||
{
|
||||
string samplePath = Path.Combine(s_samplesPath, "08_ReliableStreaming");
|
||||
|
||||
@@ -2,16 +2,18 @@
|
||||
|
||||
"""Aggregate per-provider JUnit XML test results and generate a trend report.
|
||||
|
||||
Parses ``pytest.xml`` (JUnit XML) files produced by each CI job, merges them
|
||||
into a single run, combines with historical data, and generates a markdown
|
||||
trend table — the same pattern used by ``scripts/sample_validation/aggregate.py``.
|
||||
Parses JUnit XML files produced by CI jobs — both ``pytest.xml`` (Python) and
|
||||
xunit v3 ``*.junit`` (dotnet) — merges them into a single run, combines
|
||||
with historical data, and generates a markdown trend table.
|
||||
|
||||
Usage (from CI):
|
||||
python aggregate.py <reports-dir> <history-file> <output-file>
|
||||
|
||||
The reports directory is expected to contain subdirectories named
|
||||
``test-results-<provider>/`` each containing a ``pytest.xml`` file
|
||||
(created by ``actions/download-artifact``).
|
||||
The reports directory is expected to contain artifact subdirectories. Two
|
||||
layouts are supported:
|
||||
|
||||
- **Python (pytest):** ``test-results-<provider>/pytest.xml``
|
||||
- **Dotnet (xunit):** ``dotnet-test-results-<tfm>-<os>/*.junit``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -46,9 +48,21 @@ def _format_run_label(timestamp: str) -> str:
|
||||
def _derive_provider(directory_name: str) -> str:
|
||||
"""Derive a provider label from a report directory name.
|
||||
|
||||
``test-results-openai`` → ``OpenAI``
|
||||
``test-results-azure-openai`` → ``Azure OpenAI``
|
||||
Handles both Python and dotnet naming conventions:
|
||||
- ``test-results-openai`` → ``OpenAI``
|
||||
- ``test-results-azure-openai`` → ``Azure OpenAI``
|
||||
- ``dotnet-test-results-net10.0-ubuntu-latest`` → ``net10.0 (ubuntu)``
|
||||
"""
|
||||
# Dotnet convention: dotnet-test-results-<framework>-<os>
|
||||
if directory_name.startswith("dotnet-test-results-"):
|
||||
raw = directory_name.replace("dotnet-test-results-", "")
|
||||
# e.g. "net10.0-ubuntu-latest" → framework="net10.0", os="ubuntu-latest"
|
||||
parts = raw.split("-", 1)
|
||||
framework = parts[0]
|
||||
os_label = parts[1].split("-")[0] if len(parts) > 1 else ""
|
||||
return f"{framework} ({os_label})" if os_label else framework
|
||||
|
||||
# Python convention: test-results-<provider>
|
||||
raw = directory_name.replace("test-results-", "")
|
||||
known = {
|
||||
"openai": "OpenAI",
|
||||
@@ -102,11 +116,21 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]:
|
||||
# it appends the class name, e.g.:
|
||||
# "packages.foundry.tests.foundry.test_foundry_embedding_client.TestFoundryEmbeddingIntegration"
|
||||
# We want the file-level module: "test_foundry_embedding_client"
|
||||
#
|
||||
# xunit (dotnet) writes classname as the full C# type, e.g.:
|
||||
# "OpenAIChatCompletion.IntegrationTests.ChatCompletionTests"
|
||||
# We want the project prefix: "OpenAIChatCompletion"
|
||||
if classname:
|
||||
parts = classname.rsplit(".", 2)
|
||||
# If the last segment starts with uppercase it's a class name — take the one before it
|
||||
if len(parts) >= 2 and parts[-1][0:1].isupper():
|
||||
module = parts[-2]
|
||||
# For dotnet: if the penultimate part is "IntegrationTests" or "UnitTests",
|
||||
# use the part before that (the project name) instead
|
||||
if parts[-2] in ("IntegrationTests", "UnitTests") and len(parts) >= 3:
|
||||
# parts[0] may contain dots — take the last segment of it
|
||||
module = parts[0].rsplit(".", 1)[-1]
|
||||
else:
|
||||
module = parts[-2]
|
||||
else:
|
||||
module = parts[-1]
|
||||
else:
|
||||
@@ -148,28 +172,61 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]:
|
||||
"""Discover JUnit XML test result files in artifact subdirectories.
|
||||
|
||||
Handles two directory layouts:
|
||||
- **Python (pytest):** ``test-results-<provider>/pytest.xml``
|
||||
- **Dotnet (xunit):** ``dotnet-test-results-<tfm>-<os>/*.junit``
|
||||
|
||||
Returns:
|
||||
List of ``(directory_name, xml_path)`` tuples.
|
||||
"""
|
||||
xml_files: list[tuple[str, Path]] = []
|
||||
if not reports_dir.is_dir():
|
||||
return xml_files
|
||||
|
||||
for subdir in sorted(reports_dir.iterdir()):
|
||||
if not subdir.is_dir():
|
||||
continue
|
||||
|
||||
# Python layout: single pytest.xml per artifact
|
||||
pytest_xml = subdir / "pytest.xml"
|
||||
if pytest_xml.exists():
|
||||
xml_files.append((subdir.name, pytest_xml))
|
||||
continue
|
||||
|
||||
# Dotnet layout: multiple *.junit files per artifact
|
||||
junit_files = sorted(subdir.rglob("*.junit"))
|
||||
for jf in junit_files:
|
||||
xml_files.append((subdir.name, jf))
|
||||
|
||||
# Fallback: any .xml file that looks like JUnit (not .trx, not cobertura)
|
||||
if not junit_files:
|
||||
for xf in sorted(subdir.rglob("*.xml")):
|
||||
if xf.suffix == ".xml" and not xf.name.endswith(".cobertura.xml"):
|
||||
xml_files.append((subdir.name, xf))
|
||||
|
||||
return xml_files
|
||||
|
||||
|
||||
def load_current_run(reports_dir: Path) -> dict[str, Any]:
|
||||
"""Load per-provider JUnit XML reports from the current CI run and merge.
|
||||
|
||||
Supports both pytest (Python) and xunit v3 (dotnet) JUnit XML formats.
|
||||
|
||||
Args:
|
||||
reports_dir: Directory containing ``test-results-<provider>/`` subdirs.
|
||||
reports_dir: Directory containing artifact subdirectories with XML reports.
|
||||
|
||||
Returns:
|
||||
Merged run dict with ``timestamp``, ``summary``, ``results``.
|
||||
"""
|
||||
combined_results: dict[str, dict[str, str]] = {} # nodeid → {status, provider}
|
||||
|
||||
# actions/download-artifact creates: reports_dir/test-results-openai/pytest.xml
|
||||
xml_files: list[tuple[str, Path]] = []
|
||||
if reports_dir.is_dir():
|
||||
for subdir in sorted(reports_dir.iterdir()):
|
||||
if subdir.is_dir():
|
||||
xml_file = subdir / "pytest.xml"
|
||||
if xml_file.exists():
|
||||
xml_files.append((subdir.name, xml_file))
|
||||
xml_files = _discover_xml_files(reports_dir)
|
||||
|
||||
if not xml_files:
|
||||
print(f"Warning: No pytest.xml files found in {reports_dir}")
|
||||
print(f"Warning: No JUnit XML files found in {reports_dir}")
|
||||
return {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"summary": {
|
||||
@@ -181,19 +238,42 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]:
|
||||
"results": {},
|
||||
}
|
||||
|
||||
# Dotnet tests always run under multiple frameworks, so we always
|
||||
# qualify their keys with the provider to ensure deterministic,
|
||||
# stable keys across runs regardless of file parse order.
|
||||
is_dotnet = any(d.startswith("dotnet-test-results-") for d, _ in xml_files)
|
||||
|
||||
for dir_name, xml_file in xml_files:
|
||||
print(f" Loading: {xml_file}")
|
||||
provider = _derive_provider(dir_name)
|
||||
tests = _parse_junit_xml(xml_file)
|
||||
for test in tests:
|
||||
combined_results[test["nodeid"]] = {
|
||||
raw_id = test["nodeid"]
|
||||
key = f"{provider}::{raw_id}" if is_dotnet else raw_id
|
||||
|
||||
combined_results[key] = {
|
||||
"status": test["status"],
|
||||
"provider": provider,
|
||||
"module": test.get("module", ""),
|
||||
}
|
||||
|
||||
# Build summary counts using mutually exclusive status buckets.
|
||||
# Errors are folded into the failed count for display purposes.
|
||||
# Build per-provider summary counts so the report can show one row per
|
||||
# framework (dotnet) or per provider (Python).
|
||||
provider_counts: dict[str, dict[str, int]] = {}
|
||||
for r in combined_results.values():
|
||||
prov = r.get("provider", "Unknown")
|
||||
if prov not in provider_counts:
|
||||
provider_counts[prov] = {"total": 0, "passed": 0, "failed": 0, "skipped": 0}
|
||||
provider_counts[prov]["total"] += 1
|
||||
st = r["status"]
|
||||
if st == "passed":
|
||||
provider_counts[prov]["passed"] += 1
|
||||
elif st in ("failed", "error"):
|
||||
provider_counts[prov]["failed"] += 1
|
||||
elif st == "skipped":
|
||||
provider_counts[prov]["skipped"] += 1
|
||||
|
||||
# Overall summary (sum across all providers).
|
||||
statuses = [r["status"] for r in combined_results.values()]
|
||||
summary = {
|
||||
"total": len(statuses),
|
||||
@@ -205,6 +285,7 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]:
|
||||
return {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"summary": summary,
|
||||
"provider_summaries": provider_counts,
|
||||
"results": combined_results,
|
||||
}
|
||||
|
||||
@@ -253,7 +334,29 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
|
||||
"",
|
||||
]
|
||||
|
||||
# --- Overall status table (most recent first) ---
|
||||
# Detect whether this is a dotnet report (provider-qualified keys).
|
||||
is_dotnet = False
|
||||
for run in runs:
|
||||
provider_sums = run.get("provider_summaries", {})
|
||||
if any(p.startswith("net") for p in provider_sums):
|
||||
is_dotnet = True
|
||||
break
|
||||
|
||||
if is_dotnet:
|
||||
_generate_dotnet_report(lines, runs)
|
||||
else:
|
||||
_generate_python_report(lines, runs)
|
||||
|
||||
lines.append("")
|
||||
lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _generate_python_report(lines: list[str], runs: list[dict[str, Any]]) -> None:
|
||||
"""Generate the original single-table Python report format."""
|
||||
# --- Overall status table ---
|
||||
lines.append("## Overall Status (Last 5 Runs)")
|
||||
lines.append("")
|
||||
lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
|
||||
@@ -276,27 +379,91 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
|
||||
|
||||
lines.append("")
|
||||
|
||||
# --- Per-test results table ---
|
||||
lines.append("## Per-Test Results")
|
||||
lines.append("")
|
||||
# --- Single per-test results table ---
|
||||
_generate_per_test_table(lines, runs, "## Per-Test Results")
|
||||
|
||||
# Collect all test nodeids, providers, and modules across all runs
|
||||
all_tests: dict[str, str] = {} # nodeid → provider (from most recent run)
|
||||
all_modules: dict[str, str] = {} # nodeid → module (from most recent run)
|
||||
|
||||
def _generate_dotnet_report(lines: list[str], runs: list[dict[str, Any]]) -> None:
|
||||
"""Generate per-framework tables for dotnet (net10.0, net472, etc.)."""
|
||||
# Collect all providers seen across all runs, sorted for stable ordering
|
||||
all_providers: set[str] = set()
|
||||
for run in runs:
|
||||
all_providers.update(run.get("provider_summaries", {}).keys())
|
||||
providers = sorted(all_providers)
|
||||
|
||||
for provider in providers:
|
||||
lines.append(f"## {provider}")
|
||||
lines.append("")
|
||||
|
||||
# --- Per-provider summary table ---
|
||||
lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
|
||||
lines.append("|-----|-------|-----------|-----------|------------|")
|
||||
|
||||
for run in reversed(runs):
|
||||
ps = run.get("provider_summaries", {}).get(provider, {})
|
||||
total = ps.get("total", 0)
|
||||
label = _format_run_label(run["timestamp"])
|
||||
if total == 0:
|
||||
lines.append(f"| {label} | N/A | N/A | N/A | N/A |")
|
||||
else:
|
||||
lines.append(
|
||||
f"| {label} "
|
||||
f"| {total} "
|
||||
f"| {ps.get('passed', 0)}/{total} "
|
||||
f"| {ps.get('failed', 0)}/{total} "
|
||||
f"| {ps.get('skipped', 0)}/{total} |"
|
||||
)
|
||||
|
||||
for _ in range(MAX_HISTORY - len(runs)):
|
||||
lines.append("| N/A | N/A | N/A | N/A | N/A |")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# --- Per-test table filtered to this provider ---
|
||||
_generate_per_test_table(
|
||||
lines, runs,
|
||||
heading=None,
|
||||
provider_filter=provider,
|
||||
)
|
||||
|
||||
|
||||
def _generate_per_test_table(
|
||||
lines: list[str],
|
||||
runs: list[dict[str, Any]],
|
||||
heading: str | None = None,
|
||||
provider_filter: str | None = None,
|
||||
) -> None:
|
||||
"""Emit a per-test trend table, optionally filtered to a single provider."""
|
||||
if heading:
|
||||
lines.append(heading)
|
||||
lines.append("")
|
||||
|
||||
# Collect all test nodeids (and metadata) across all runs
|
||||
all_tests: dict[str, str] = {} # nodeid → provider
|
||||
all_modules: dict[str, str] = {} # nodeid → module
|
||||
for run in runs:
|
||||
for nodeid, info in run.get("results", {}).items():
|
||||
provider = info.get("provider", "Unknown") if isinstance(info, dict) else "Unknown"
|
||||
module = info.get("module", "") if isinstance(info, dict) else ""
|
||||
all_tests[nodeid] = provider
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
prov = info.get("provider", "Unknown")
|
||||
if provider_filter and prov != provider_filter:
|
||||
continue
|
||||
module = info.get("module", "")
|
||||
all_tests[nodeid] = prov
|
||||
all_modules[nodeid] = module
|
||||
|
||||
if not all_tests:
|
||||
lines.append("*No test results available.*")
|
||||
return "\n".join(lines)
|
||||
lines.append("")
|
||||
return
|
||||
|
||||
# Build header (most recent run first)
|
||||
header = "| Test | File | Provider |"
|
||||
separator = "|------|------|----------|"
|
||||
# Build header
|
||||
if provider_filter:
|
||||
header = "| Test | File |"
|
||||
separator = "|------|------|"
|
||||
else:
|
||||
header = "| Test | File | Provider |"
|
||||
separator = "|------|------|----------|"
|
||||
for run in reversed(runs):
|
||||
label = _format_run_label(run["timestamp"])
|
||||
header += f" {label} |"
|
||||
@@ -308,12 +475,15 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
|
||||
lines.append(header)
|
||||
lines.append(separator)
|
||||
|
||||
# Sort by provider then test name
|
||||
for nodeid in sorted(all_tests, key=lambda n: (all_tests[n], n)):
|
||||
provider = all_tests[nodeid]
|
||||
# Sort by module then test name
|
||||
for nodeid in sorted(all_tests, key=lambda n: (all_modules.get(n, ""), n)):
|
||||
module = all_modules.get(nodeid, "")
|
||||
short = _short_name(nodeid)
|
||||
row = f"| `{short}` | `{module}` | {provider} |"
|
||||
if provider_filter:
|
||||
row = f"| `{short}` | `{module}` |"
|
||||
else:
|
||||
provider = all_tests[nodeid]
|
||||
row = f"| `{short}` | `{module}` | {provider} |"
|
||||
|
||||
for run in reversed(runs):
|
||||
result = run.get("results", {}).get(nodeid)
|
||||
@@ -330,10 +500,6 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
|
||||
lines.append(row)
|
||||
|
||||
lines.append("")
|
||||
lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user