mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Flaky test report (#5342)
* Add flaky test trend reporting to CI workflows Parse JUnit XML (pytest.xml) from each integration test job and aggregate results into a markdown trend report showing per-test pass/fail/skip status across the last 5 runs. Changes: - Add python/scripts/flaky_report/ package (JUnit XML parser + trend report generator following the sample_validation pattern) - Add upload-artifact steps to all 6 integration test jobs in both python-merge-tests.yml and python-integration-tests.yml - Add python-flaky-test-report aggregation job with history caching - Add --junitxml=pytest.xml to integration-tests.yml jobs (already present in merge-tests.yml) - Fix Cosmos job --junitxml path (use absolute path since uv run --directory changes cwd) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix flaky report: handle missing test results gracefully - Guard against missing reports directory in load_current_run() - Only run report job when at least one integration test job completed (skip when all jobs are skipped, e.g. on pull_request events) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address PR review: fix provider names and if-expression precedence - Use explicit provider name mapping in _derive_provider() so OpenAI renders correctly instead of 'Openai' - Fix operator precedence in workflow if-expressions by wrapping success/failure checks in parentheses Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Add File column and xfail detection to flaky test report - Add File column showing module name (e.g., test_openai_chat_client) to disambiguate tests with the same function name across files - Detect pytest xfail tests in JUnit XML (type=pytest.xfail) and show them with a distinct warning emoji instead of skip emoji - Update legend to include xfail explanation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Add Foundry embedding env vars to merge-tests workflow Sync the Foundry integration job in python-merge-tests.yml with python-integration-tests.yml by adding FOUNDRY_MODELS_ENDPOINT, FOUNDRY_MODELS_API_KEY, FOUNDRY_EMBEDDING_MODEL, and FOUNDRY_IMAGE_EMBEDDING_MODEL. Once the repo variables/secrets are configured, the embedding integration test will run in CI. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix File column showing class name instead of module name When a test is inside a class, pytest writes the classname as e.g. 'pkg.test_file.TestClass'. The previous rsplit logic extracted 'TestClass' instead of 'test_file'. Now detect uppercase-starting segments as class names and use the preceding segment instead. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address PR review: UTC timestamps, XML error handling, summary fix, docstring - Use datetime.now(timezone.utc) for accurate UTC timestamps - Catch ET.ParseError per-file so corrupt XML doesn't crash the report - Remove separate 'error' key from summary (errors folded into 'failed') - Fix _short_name docstring to show actual dotted classname::name format Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
d75f874d78
commit
3f23e1dfbf
@@ -87,6 +87,14 @@ jobs:
|
||||
-n logical --dist worksteal
|
||||
--timeout=120 --session-timeout=900 --timeout_method thread
|
||||
--retries 2 --retry-delay 5
|
||||
--junitxml=pytest.xml
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-openai
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Azure OpenAI integration tests
|
||||
python-tests-azure-openai:
|
||||
@@ -130,6 +138,14 @@ jobs:
|
||||
-n logical --dist worksteal
|
||||
--timeout=120 --session-timeout=900 --timeout_method thread
|
||||
--retries 2 --retry-delay 5
|
||||
--junitxml=pytest.xml
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-azure-openai
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Misc integration tests (Anthropic, Hyperlight, Ollama, MCP)
|
||||
python-tests-misc-integration:
|
||||
@@ -173,6 +189,14 @@ jobs:
|
||||
-n logical --dist worksteal
|
||||
--timeout=120 --session-timeout=900 --timeout_method thread
|
||||
--retries 2 --retry-delay 30
|
||||
--junitxml=pytest.xml
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-misc
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
- name: Stop local MCP server
|
||||
if: always()
|
||||
shell: bash
|
||||
@@ -249,6 +273,14 @@ jobs:
|
||||
-x
|
||||
--timeout=360 --session-timeout=900 --timeout_method thread
|
||||
--retries 2 --retry-delay 5
|
||||
--junitxml=pytest.xml
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-functions
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Foundry integration tests
|
||||
python-tests-foundry:
|
||||
@@ -295,6 +327,14 @@ jobs:
|
||||
-n logical --dist worksteal
|
||||
--timeout=120 --session-timeout=900 --timeout_method thread
|
||||
--retries 2 --retry-delay 5
|
||||
--junitxml=pytest.xml
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-foundry
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Azure Cosmos integration tests
|
||||
python-tests-cosmos:
|
||||
@@ -339,7 +379,80 @@ jobs:
|
||||
echo "Cosmos DB emulator did not become ready in time." >&2
|
||||
exit 1
|
||||
- name: Test with pytest (Cosmos integration)
|
||||
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
|
||||
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=${{ github.workspace }}/python/pytest.xml
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-cosmos
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Flaky test trend report (aggregates per-job JUnit XML results)
|
||||
python-flaky-test-report:
|
||||
name: Flaky Test Report
|
||||
if: >
|
||||
always() &&
|
||||
(contains(join(needs.*.result, ','), 'success') ||
|
||||
contains(join(needs.*.result, ','), 'failure'))
|
||||
needs:
|
||||
[
|
||||
python-tests-openai,
|
||||
python-tests-azure-openai,
|
||||
python-tests-misc-integration,
|
||||
python-tests-functions,
|
||||
python-tests-foundry,
|
||||
python-tests-cosmos,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: python
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
ref: ${{ inputs.checkout-ref }}
|
||||
persist-credentials: false
|
||||
- name: Set up python and install the project
|
||||
uses: ./.github/actions/python-setup
|
||||
with:
|
||||
python-version: ${{ env.UV_PYTHON }}
|
||||
os: ${{ runner.os }}
|
||||
- name: Download all test results from current run
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: test-results-*
|
||||
path: test-results/
|
||||
- name: Restore flaky report history cache
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: python/flaky-report-history.json
|
||||
key: flaky-report-history-integration-${{ github.run_id }}
|
||||
restore-keys: |
|
||||
flaky-report-history-integration-
|
||||
- name: Generate trend report
|
||||
run: >
|
||||
uv run python scripts/flaky_report/aggregate.py
|
||||
../test-results/
|
||||
flaky-report-history.json
|
||||
flaky-test-report.md
|
||||
- name: Post to Job Summary
|
||||
if: always()
|
||||
run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
|
||||
- name: Save flaky report history cache
|
||||
if: always()
|
||||
uses: actions/cache/save@v4
|
||||
with:
|
||||
path: python/flaky-report-history.json
|
||||
key: flaky-report-history-integration-${{ github.run_id }}
|
||||
- name: Upload unified trend report
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: flaky-test-report
|
||||
path: |
|
||||
python/flaky-test-report.md
|
||||
python/flaky-report-history.json
|
||||
|
||||
python-integration-tests-check:
|
||||
if: always()
|
||||
|
||||
@@ -181,6 +181,13 @@ jobs:
|
||||
display-options: fEX
|
||||
fail-on-empty: false
|
||||
title: OpenAI integration test results
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-openai
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Azure OpenAI integration tests
|
||||
python-tests-azure-openai:
|
||||
@@ -244,6 +251,13 @@ jobs:
|
||||
display-options: fEX
|
||||
fail-on-empty: false
|
||||
title: Azure OpenAI integration test results
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-azure-openai
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Misc integration tests (Anthropic, Ollama, MCP)
|
||||
python-tests-misc-integration:
|
||||
@@ -321,6 +335,13 @@ jobs:
|
||||
display-options: fEX
|
||||
fail-on-empty: false
|
||||
title: Misc integration test results
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-misc
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Azure Functions + Durable Task integration tests
|
||||
python-tests-functions:
|
||||
@@ -392,6 +413,13 @@ jobs:
|
||||
display-options: fEX
|
||||
fail-on-empty: false
|
||||
title: Functions integration test results
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-functions
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
python-tests-foundry:
|
||||
name: Python Integration Tests - Foundry
|
||||
@@ -409,6 +437,10 @@ jobs:
|
||||
FOUNDRY_MODEL: ${{ vars.FOUNDRY_MODEL }}
|
||||
FOUNDRY_AGENT_NAME: ${{ vars.FOUNDRY_AGENT_NAME }}
|
||||
FOUNDRY_AGENT_VERSION: ${{ vars.FOUNDRY_AGENT_VERSION }}
|
||||
FOUNDRY_MODELS_ENDPOINT: ${{ vars.FOUNDRY_MODELS_ENDPOINT || '' }}
|
||||
FOUNDRY_MODELS_API_KEY: ${{ secrets.FOUNDRY_MODELS_API_KEY || '' }}
|
||||
FOUNDRY_EMBEDDING_MODEL: ${{ vars.FOUNDRY_EMBEDDING_MODEL || '' }}
|
||||
FOUNDRY_IMAGE_EMBEDDING_MODEL: ${{ vars.FOUNDRY_IMAGE_EMBEDDING_MODEL || '' }}
|
||||
LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
|
||||
defaults:
|
||||
run:
|
||||
@@ -448,6 +480,13 @@ jobs:
|
||||
display-options: fEX
|
||||
fail-on-empty: false
|
||||
title: Test results
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-foundry
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# TODO: Add python-tests-lab
|
||||
|
||||
@@ -497,7 +536,7 @@ jobs:
|
||||
echo "Cosmos DB emulator did not become ready in time." >&2
|
||||
exit 1
|
||||
- name: Test with pytest (Cosmos integration)
|
||||
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=pytest.xml
|
||||
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=${{ github.workspace }}/python/pytest.xml
|
||||
working-directory: ./python
|
||||
- name: Surface failing tests
|
||||
if: always()
|
||||
@@ -508,6 +547,76 @@ jobs:
|
||||
display-options: fEX
|
||||
fail-on-empty: false
|
||||
title: Cosmos integration test results
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results-cosmos
|
||||
path: ./python/pytest.xml
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Flaky test trend report (aggregates per-job JUnit XML results)
|
||||
python-flaky-test-report:
|
||||
name: Flaky Test Report
|
||||
if: >
|
||||
always() &&
|
||||
(contains(join(needs.*.result, ','), 'success') ||
|
||||
contains(join(needs.*.result, ','), 'failure'))
|
||||
needs:
|
||||
[
|
||||
python-tests-openai,
|
||||
python-tests-azure-openai,
|
||||
python-tests-misc-integration,
|
||||
python-tests-functions,
|
||||
python-tests-foundry,
|
||||
python-tests-cosmos,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: python
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up python and install the project
|
||||
uses: ./.github/actions/python-setup
|
||||
with:
|
||||
python-version: ${{ env.UV_PYTHON }}
|
||||
os: ${{ runner.os }}
|
||||
- name: Download all test results from current run
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: test-results-*
|
||||
path: test-results/
|
||||
- name: Restore flaky report history cache
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: python/flaky-report-history.json
|
||||
key: flaky-report-history-merge-${{ github.run_id }}
|
||||
restore-keys: |
|
||||
flaky-report-history-merge-
|
||||
- name: Generate trend report
|
||||
run: >
|
||||
uv run python scripts/flaky_report/aggregate.py
|
||||
../test-results/
|
||||
flaky-report-history.json
|
||||
flaky-test-report.md
|
||||
- name: Post to Job Summary
|
||||
if: always()
|
||||
run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
|
||||
- name: Save flaky report history cache
|
||||
if: always()
|
||||
uses: actions/cache/save@v4
|
||||
with:
|
||||
path: python/flaky-report-history.json
|
||||
key: flaky-report-history-merge-${{ github.run_id }}
|
||||
- name: Upload unified trend report
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: flaky-test-report
|
||||
path: |
|
||||
python/flaky-test-report.md
|
||||
python/flaky-report-history.json
|
||||
|
||||
python-integration-tests-check:
|
||||
if: always()
|
||||
|
||||
Reference in New Issue
Block a user