From 4b533608b607a71e6383a962b70fee868ba1f50a Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Tue, 24 Mar 2026 18:21:32 -0700
Subject: [PATCH] Python: Update sample validation scripts (#4870)

* Update sample validation scripts

* Adjust prompt

* Update autogen-migration samples

* Add fix suggestion

* Split jobs

* Add .env

* Create trend report

* Add timestamp

* Add more env vars

* Comments

* force node24

* force node24

* force node22
---
 .../sample-validation-setup/action.yml        |   4 +-
 .../workflows/python-sample-validation.yml    | 528 +++++++++++++++++-
 .../chat_client/built_in_chat_clients.py      |  16 +-
 .../01_round_robin_group_chat.py              |  26 +-
 .../orchestrations/02_selector_group_chat.py  |  22 +-
 .../orchestrations/03_swarm.py                |  21 +-
 .../orchestrations/04_magentic_one.py         |  21 +-
 .../single_agent/01_basic_assistant_agent.py  |  19 +-
 .../02_assistant_agent_with_tool.py           |  20 +-
 .../03_assistant_agent_thread_and_stream.py   |  19 +-
 .../single_agent/04_agent_as_tool.py          |  20 +-
 python/scripts/sample_validation/README.md    |  13 +-
 python/scripts/sample_validation/__main__.py  |  10 +-
 python/scripts/sample_validation/aggregate.py | 224 ++++++++
 .../create_dynamic_workflow_executor.py       | 104 ++--
 python/scripts/sample_validation/discovery.py |  24 +-
 python/scripts/sample_validation/models.py    |  20 +-
 python/scripts/sample_validation/report.py    |  16 +-
 ...un_dynamic_validation_workflow_executor.py |   3 +-
 19 files changed, 928 insertions(+), 202 deletions(-)
 create mode 100644 python/scripts/sample_validation/aggregate.py

diff --git a/.github/actions/sample-validation-setup/action.yml b/.github/actions/sample-validation-setup/action.yml
index 3736348579..2920aaa5bd 100644
--- a/.github/actions/sample-validation-setup/action.yml
+++ b/.github/actions/sample-validation-setup/action.yml
@@ -24,7 +24,9 @@ runs:
   using: "composite"
   steps:
     - name: Set up Node.js environment
-      uses: actions/setup-node@v4
+      uses: actions/setup-node@v6
+      with:
+        node-version: 22
 
     - name: Install Copilot CLI
       shell: bash
diff --git a/.github/workflows/python-sample-validation.yml b/.github/workflows/python-sample-validation.yml
index 4a14e6b41b..90fecec6a2 100644
--- a/.github/workflows/python-sample-validation.yml
+++ b/.github/workflows/python-sample-validation.yml
@@ -41,6 +41,13 @@ jobs:
           azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
           os: ${{ runner.os }}
 
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+
       - name: Run sample validation
         run: |
           cd scripts && uv run python -m sample_validation --subdir 01-get-started --save-report --report-name 01-get-started
@@ -50,7 +57,7 @@ jobs:
         if: always()
         with:
           name: validation-report-01-get-started
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
 
   validate-02-agents:
     name: Validate 02-agents
@@ -64,10 +71,13 @@ jobs:
       AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
       AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
       AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+      AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__EMBEDDINGDEPLOYMENTNAME }}
       # OpenAI configuration
       OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
       OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
       OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+      # GitHub MCP
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       # Observability
       ENABLE_INSTRUMENTATION: "true"
     defaults:
@@ -84,16 +94,420 @@ jobs:
           azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
           os: ${{ runner.os }}
 
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME=$AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME" >> .env
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+          echo "GITHUB_PAT=$GITHUB_PAT" >> .env
+
       - name: Run sample validation
         run: |
-          cd scripts && uv run python -m sample_validation --subdir 02-agents --save-report --report-name 02-agents
+          cd scripts && uv run python -m sample_validation --subdir 02-agents --exclude providers --save-report --report-name 02-agents
 
       - name: Upload validation report
         uses: actions/upload-artifact@v7
         if: always()
         with:
           name: validation-report-02-agents
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-openai:
+    name: Validate 02-agents/providers/openai
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
+      OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
+      OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/openai --save-report --report-name 02-agents-openai
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-openai
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-azure-openai:
+    name: Validate 02-agents/providers/azure_openai
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
+      AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
+      AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
+      AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/azure_openai --save-report --report-name 02-agents-azure-openai
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-azure-openai
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-azure-ai:
+    name: Validate 02-agents/providers/azure_ai
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
+      AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+      AZURE_AI_CHAT_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
+      AZURE_AI_EMBEDDING_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__EMBEDDINGDEPLOYMENTNAME }}
+      BING_CONNECTION_ID: ${{ secrets.BING_CONNECTION_ID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_AI_CHAT_MODEL_DEPLOYMENT_NAME=$AZURE_AI_CHAT_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_AI_EMBEDDING_MODEL_DEPLOYMENT_NAME=$AZURE_AI_EMBEDDING_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "BING_CONNECTION_ID=$BING_CONNECTION_ID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/azure_ai --save-report --report-name 02-agents-azure-ai
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-azure-ai
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-azure-ai-agent:
+    name: Validate 02-agents/providers/azure_ai_agent
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
+      AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/azure_ai_agent --save-report --report-name 02-agents-azure-ai-agent
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-azure-ai-agent
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-anthropic:
+    name: Validate 02-agents/providers/anthropic
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+      ANTHROPIC_CHAT_MODEL_ID: ${{ vars.ANTHROPIC_CHAT_MODEL_ID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
+          echo "ANTHROPIC_CHAT_MODEL_ID=$ANTHROPIC_CHAT_MODEL_ID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/anthropic --save-report --report-name 02-agents-anthropic
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-anthropic
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-github-copilot:
+    name: Validate 02-agents/providers/github_copilot
+    runs-on: ubuntu-latest
+    environment: integration
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/github_copilot --save-report --report-name 02-agents-github-copilot
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-github-copilot
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-amazon:
+    name: Validate 02-agents/providers/amazon
+    if: false  # Temporarily disabled - requires AWS credentials
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      BEDROCK_CHAT_MODEL_ID: ${{ vars.BEDROCK__CHATMODELID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/amazon --save-report --report-name 02-agents-amazon
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-amazon
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-ollama:
+    name: Validate 02-agents/providers/ollama
+    if: false  # Temporarily disabled - requires local Ollama server
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      OLLAMA_MODEL: ${{ vars.OLLAMA__MODEL }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/ollama --save-report --report-name 02-agents-ollama
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-ollama
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-foundry-local:
+    name: Validate 02-agents/providers/foundry_local
+    if: false  # Temporarily disabled - requires local Foundry setup
+    runs-on: ubuntu-latest
+    environment: integration
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/foundry_local --save-report --report-name 02-agents-foundry-local
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-foundry-local
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-copilotstudio:
+    name: Validate 02-agents/providers/copilotstudio
+    if: false  # Temporarily disabled - requires Copilot Studio setup
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      COPILOTSTUDIOAGENT__ENVIRONMENTID: ${{ secrets.COPILOTSTUDIOAGENT__ENVIRONMENTID }}
+      COPILOTSTUDIOAGENT__SCHEMANAME: ${{ secrets.COPILOTSTUDIOAGENT__SCHEMANAME }}
+      COPILOTSTUDIOAGENT__TENANTID: ${{ secrets.COPILOTSTUDIOAGENT__TENANTID }}
+      COPILOTSTUDIOAGENT__AGENTAPPID: ${{ secrets.COPILOTSTUDIOAGENT__AGENTAPPID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "COPILOTSTUDIOAGENT__ENVIRONMENTID=$COPILOTSTUDIOAGENT__ENVIRONMENTID" >> .env
+          echo "COPILOTSTUDIOAGENT__SCHEMANAME=$COPILOTSTUDIOAGENT__SCHEMANAME" >> .env
+          echo "COPILOTSTUDIOAGENT__TENANTID=$COPILOTSTUDIOAGENT__TENANTID" >> .env
+          echo "COPILOTSTUDIOAGENT__AGENTAPPID=$COPILOTSTUDIOAGENT__AGENTAPPID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/copilotstudio --save-report --report-name 02-agents-copilotstudio
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-copilotstudio
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-custom:
+    name: Validate 02-agents/providers/custom
+    runs-on: ubuntu-latest
+    environment: integration
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/custom --save-report --report-name 02-agents-custom
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-custom
+          path: python/samples/sample_validation/reports/
 
   validate-03-workflows:
     name: Validate 03-workflows
@@ -121,6 +535,14 @@ jobs:
           azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
           os: ${{ runner.os }}
 
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+
       - name: Run sample validation
         run: |
           cd scripts && uv run python -m sample_validation --subdir 03-workflows --save-report --report-name 03-workflows
@@ -130,7 +552,7 @@ jobs:
         if: always()
         with:
           name: validation-report-03-workflows
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
 
   validate-04-hosting:
     name: Validate 04-hosting
@@ -169,7 +591,7 @@ jobs:
         if: always()
         with:
           name: validation-report-04-hosting
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
 
   validate-05-end-to-end:
     name: Validate 05-end-to-end
@@ -213,7 +635,7 @@ jobs:
         if: always()
         with:
           name: validation-report-05-end-to-end
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
 
   validate-autogen-migration:
     name: Validate autogen-migration
@@ -244,6 +666,16 @@ jobs:
           azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
           os: ${{ runner.os }}
 
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+
       - name: Run sample validation
         run: |
           cd scripts && uv run python -m sample_validation --subdir autogen-migration --save-report --report-name autogen-migration
@@ -253,7 +685,7 @@ jobs:
         if: always()
         with:
           name: validation-report-autogen-migration
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
 
   validate-semantic-kernel-migration:
     name: Validate semantic-kernel-migration
@@ -290,6 +722,21 @@ jobs:
           azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
           os: ${{ runner.os }}
 
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+          echo "COPILOTSTUDIOAGENT__ENVIRONMENTID=$COPILOTSTUDIOAGENT__ENVIRONMENTID" >> .env
+          echo "COPILOTSTUDIOAGENT__SCHEMANAME=$COPILOTSTUDIOAGENT__SCHEMANAME" >> .env
+          echo "COPILOTSTUDIOAGENT__TENANTID=$COPILOTSTUDIOAGENT__TENANTID" >> .env
+          echo "COPILOTSTUDIOAGENT__AGENTAPPID=$COPILOTSTUDIOAGENT__AGENTAPPID" >> .env
+
       - name: Run sample validation
         run: |
           cd scripts && uv run python -m sample_validation --subdir semantic-kernel-migration --save-report --report-name semantic-kernel-migration
@@ -299,4 +746,69 @@ jobs:
         if: always()
         with:
           name: validation-report-semantic-kernel-migration
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
+
+  aggregate-results:
+    name: Aggregate Results
+    runs-on: ubuntu-latest
+    if: always()
+    needs:
+      - validate-01-get-started
+      - validate-02-agents
+      - validate-02-agents-openai
+      - validate-02-agents-azure-openai
+      - validate-02-agents-azure-ai
+      - validate-02-agents-azure-ai-agent
+      - validate-02-agents-anthropic
+      - validate-02-agents-github-copilot
+      - validate-02-agents-amazon
+      - validate-02-agents-ollama
+      - validate-02-agents-foundry-local
+      - validate-02-agents-copilotstudio
+      - validate-02-agents-custom
+      - validate-03-workflows
+      - validate-04-hosting
+      - validate-05-end-to-end
+      - validate-autogen-migration
+      - validate-semantic-kernel-migration
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Download all validation reports
+        uses: actions/download-artifact@v7
+        with:
+          pattern: validation-report-*
+          path: reports/
+          merge-multiple: true
+
+      - name: Restore validation history
+        id: cache-restore
+        uses: actions/cache/restore@v4
+        with:
+          path: validation-history/
+          key: validation-history-${{ github.run_id }}
+          restore-keys: |
+            validation-history-
+
+      - name: Aggregate results and generate trend report
+        run: |
+          python3 python/scripts/sample_validation/aggregate.py \
+            reports/ \
+            validation-history/history.json \
+            trend-report.md
+
+      - name: Write trend report to job summary
+        run: cat trend-report.md >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Save validation history
+        uses: actions/cache/save@v4
+        with:
+          path: validation-history/
+          key: validation-history-${{ github.run_id }}
+
+      - name: Upload trend report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-trend-report
+          path: trend-report.md
diff --git a/python/samples/02-agents/chat_client/built_in_chat_clients.py b/python/samples/02-agents/chat_client/built_in_chat_clients.py
index 8560afcf4f..21d6a0f81a 100644
--- a/python/samples/02-agents/chat_client/built_in_chat_clients.py
+++ b/python/samples/02-agents/chat_client/built_in_chat_clients.py
@@ -5,7 +5,7 @@ import os
 from random import randint
 from typing import Annotated, Any, Literal
 
-from agent_framework import SupportsChatGetResponse, tool
+from agent_framework import Message, SupportsChatGetResponse, tool
 from agent_framework.azure import (
     AzureAIAgentClient,
     AzureOpenAIAssistantsClient,
@@ -117,35 +117,37 @@ async def main(client_name: ClientName = "openai_chat") -> None:
     client = get_client(client_name)
 
     # 1. Configure prompt and streaming mode.
-    message = "What's the weather in Amsterdam and in Paris?"
+    message = Message("user", text="What's the weather in Amsterdam and in Paris?")
     stream = os.getenv("STREAM", "false").lower() == "true"
     print(f"Client: {client_name}")
-    print(f"User: {message}")
+    print(f"User: {message.text}")
 
     # 2. Run with context-managed clients.
     if isinstance(client, OpenAIAssistantsClient | AzureOpenAIAssistantsClient | AzureAIAgentClient):
         async with client:
             if stream:
-                response_stream = client.get_response(message, stream=True, options={"tools": get_weather})
+                response_stream = client.get_response([message], stream=True, options={"tools": get_weather})
                 print("Assistant: ", end="")
                 async for chunk in response_stream:
                     if chunk.text:
                         print(chunk.text, end="")
                 print("")
             else:
-                print(f"Assistant: {await client.get_response(message, stream=False, options={'tools': get_weather})}")
+                print(
+                    f"Assistant: {await client.get_response([message], stream=False, options={'tools': get_weather})}"
+                )
         return
 
     # 3. Run with non-context-managed clients.
     if stream:
-        response_stream = client.get_response(message, stream=True, options={"tools": get_weather})
+        response_stream = client.get_response([message], stream=True, options={"tools": get_weather})
         print("Assistant: ", end="")
         async for chunk in response_stream:
             if chunk.text:
                 print(chunk.text, end="")
         print("")
     else:
-        print(f"Assistant: {await client.get_response(message, stream=False, options={'tools': get_weather})}")
+        print(f"Assistant: {await client.get_response([message], stream=False, options={'tools': get_weather})}")
 
 
 if __name__ == "__main__":
diff --git a/python/samples/autogen-migration/orchestrations/01_round_robin_group_chat.py b/python/samples/autogen-migration/orchestrations/01_round_robin_group_chat.py
index e5c6bd09f8..8b883a07b9 100644
--- a/python/samples/autogen-migration/orchestrations/01_round_robin_group_chat.py
+++ b/python/samples/autogen-migration/orchestrations/01_round_robin_group_chat.py
@@ -1,25 +1,17 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/01_round_robin_group_chat.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen RoundRobinGroupChat vs Agent Framework GroupChatBuilder/SequentialBuilder.
 
-Demonstrates sequential agent orchestration where agents take turns processing
-the task in a round-robin fashion.
-"""
 
 import asyncio
 
 from agent_framework import Message
 from dotenv import load_dotenv
 
+"""AutoGen RoundRobinGroupChat vs Agent Framework GroupChatBuilder/SequentialBuilder.
+
+Demonstrates sequential agent orchestration where agents take turns processing
+the task in a round-robin fashion.
+"""
+
 # Load environment variables from .env file
 load_dotenv()
 
@@ -98,7 +90,7 @@ async def run_agent_framework() -> None:
     print("[Agent Framework] Sequential conversation:")
     async for event in workflow.run("Create a brief summary about electric vehicles", stream=True):
         if event.type == "output" and isinstance(event.data, list):
-            for message in event.data:
+            for message in event.data:  # type: ignore
                 if isinstance(message, Message) and message.role == "assistant" and message.text:
                     print(f"---------- {message.author_name} ----------")
                     print(message.text)
@@ -144,9 +136,7 @@ async def run_agent_framework_with_cycle() -> None:
         if last_message and "APPROVED" in last_message.text:
             await context.yield_output("Content approved.")
         else:
-            await context.send_message(
-                AgentExecutorRequest(messages=response.full_conversation, should_respond=True)
-            )
+            await context.send_message(AgentExecutorRequest(messages=response.full_conversation, should_respond=True))
 
     workflow = (
         WorkflowBuilder(start_executor=researcher)
diff --git a/python/samples/autogen-migration/orchestrations/02_selector_group_chat.py b/python/samples/autogen-migration/orchestrations/02_selector_group_chat.py
index 6f16e1dea9..485f3793e5 100644
--- a/python/samples/autogen-migration/orchestrations/02_selector_group_chat.py
+++ b/python/samples/autogen-migration/orchestrations/02_selector_group_chat.py
@@ -1,25 +1,17 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/02_selector_group_chat.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen SelectorGroupChat vs Agent Framework GroupChatBuilder.
 
-Demonstrates LLM-based speaker selection where an orchestrator decides
-which agent should speak next based on the conversation context.
-"""
 
 import asyncio
 
 from agent_framework import Message
 from dotenv import load_dotenv
 
+"""AutoGen SelectorGroupChat vs Agent Framework GroupChatBuilder.
+
+Demonstrates LLM-based speaker selection where an orchestrator decides
+which agent should speak next based on the conversation context.
+"""
+
 # Load environment variables from .env file
 load_dotenv()
 
@@ -113,7 +105,7 @@ async def run_agent_framework() -> None:
     print("[Agent Framework] Group chat conversation:")
     async for event in workflow.run("How do I connect to a PostgreSQL database using Python?", stream=True):
         if event.type == "output" and isinstance(event.data, list):
-            for message in event.data:
+            for message in event.data:  # type: ignore
                 if isinstance(message, Message) and message.role == "assistant" and message.text:
                     print(f"---------- {message.author_name} ----------")
                     print(message.text)
diff --git a/python/samples/autogen-migration/orchestrations/03_swarm.py b/python/samples/autogen-migration/orchestrations/03_swarm.py
index a178ffcffe..e2a8688b10 100644
--- a/python/samples/autogen-migration/orchestrations/03_swarm.py
+++ b/python/samples/autogen-migration/orchestrations/03_swarm.py
@@ -1,19 +1,4 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/03_swarm.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen Swarm pattern vs Agent Framework HandoffBuilder.
-
-Demonstrates agent handoff coordination where agents can transfer control
-to other specialized agents based on the task requirements.
-"""
 
 import asyncio
 from typing import Any
@@ -21,6 +6,12 @@ from typing import Any
 from agent_framework import AgentResponseUpdate, WorkflowEvent
 from dotenv import load_dotenv
 
+"""AutoGen Swarm pattern vs Agent Framework HandoffBuilder.
+
+Demonstrates agent handoff coordination where agents can transfer control
+to other specialized agents based on the task requirements.
+"""
+
 # Load environment variables from .env file
 load_dotenv()
 
diff --git a/python/samples/autogen-migration/orchestrations/04_magentic_one.py b/python/samples/autogen-migration/orchestrations/04_magentic_one.py
index b6728b0e46..58ec95e492 100644
--- a/python/samples/autogen-migration/orchestrations/04_magentic_one.py
+++ b/python/samples/autogen-migration/orchestrations/04_magentic_one.py
@@ -1,19 +1,4 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/04_magentic_one.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen MagenticOneGroupChat vs Agent Framework MagenticBuilder.
-
-Demonstrates orchestrated multi-agent workflows with a central coordinator
-managing specialized agents for complex tasks.
-"""
 
 import asyncio
 import json
@@ -27,6 +12,12 @@ from agent_framework import (
 from agent_framework.orchestrations import MagenticProgressLedger
 from dotenv import load_dotenv
 
+"""AutoGen MagenticOneGroupChat vs Agent Framework MagenticBuilder.
+
+Demonstrates orchestrated multi-agent workflows with a central coordinator
+managing specialized agents for complex tasks.
+"""
+
 # Load environment variables from .env file
 load_dotenv()
 
diff --git a/python/samples/autogen-migration/single_agent/01_basic_assistant_agent.py b/python/samples/autogen-migration/single_agent/01_basic_assistant_agent.py
index 73a3caba02..fad39f7719 100644
--- a/python/samples/autogen-migration/single_agent/01_basic_assistant_agent.py
+++ b/python/samples/autogen-migration/single_agent/01_basic_assistant_agent.py
@@ -1,14 +1,9 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/01_basic_assistant_agent.py
-
 # Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from dotenv import load_dotenv
+
 """Basic AutoGen AssistantAgent vs Agent Framework Agent.
 
 Both samples expect OpenAI-compatible environment variables (OPENAI_API_KEY or
@@ -16,10 +11,6 @@ Azure OpenAI configuration). Update the prompts or client wiring to match your
 model of choice before running.
 """
 
-import asyncio
-
-from dotenv import load_dotenv
-
 # Load environment variables from .env file
 load_dotenv()
 
diff --git a/python/samples/autogen-migration/single_agent/02_assistant_agent_with_tool.py b/python/samples/autogen-migration/single_agent/02_assistant_agent_with_tool.py
index aca868b9f2..af7ebaf03b 100644
--- a/python/samples/autogen-migration/single_agent/02_assistant_agent_with_tool.py
+++ b/python/samples/autogen-migration/single_agent/02_assistant_agent_with_tool.py
@@ -1,24 +1,14 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-core",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/02_assistant_agent_with_tool.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen AssistantAgent vs Agent Framework Agent with function tools.
-
-Demonstrates how to create and attach tools to agents in both frameworks.
-"""
 
 import asyncio
 
 from dotenv import load_dotenv
 
+"""AutoGen AssistantAgent vs Agent Framework Agent with function tools.
+
+Demonstrates how to create and attach tools to agents in both frameworks.
+"""
+
 # Load environment variables from .env file
 load_dotenv()
 
diff --git a/python/samples/autogen-migration/single_agent/03_assistant_agent_thread_and_stream.py b/python/samples/autogen-migration/single_agent/03_assistant_agent_thread_and_stream.py
index c544880cb1..9610f47ad2 100644
--- a/python/samples/autogen-migration/single_agent/03_assistant_agent_thread_and_stream.py
+++ b/python/samples/autogen-migration/single_agent/03_assistant_agent_thread_and_stream.py
@@ -1,23 +1,14 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/03_assistant_agent_thread_and_stream.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen vs Agent Framework: Thread management and streaming responses.
-
-Demonstrates conversation state management and streaming in both frameworks.
-"""
 
 import asyncio
 
 from dotenv import load_dotenv
 
+"""AutoGen vs Agent Framework: Thread management and streaming responses.
+
+Demonstrates conversation state management and streaming in both frameworks.
+"""
+
 # Load environment variables from .env file
 load_dotenv()
 
diff --git a/python/samples/autogen-migration/single_agent/04_agent_as_tool.py b/python/samples/autogen-migration/single_agent/04_agent_as_tool.py
index 489ec74c01..74a9fb3463 100644
--- a/python/samples/autogen-migration/single_agent/04_agent_as_tool.py
+++ b/python/samples/autogen-migration/single_agent/04_agent_as_tool.py
@@ -1,24 +1,15 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/04_agent_as_tool.py
-
 # Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from dotenv import load_dotenv
+
 """AutoGen vs Agent Framework: Agent-as-a-Tool pattern.
 
 Demonstrates hierarchical agent architectures where one agent delegates
 work to specialized sub-agents wrapped as tools.
 """
 
-import asyncio
-
-from dotenv import load_dotenv
-
 # Load environment variables from .env file
 load_dotenv()
 
@@ -107,6 +98,7 @@ async def run_agent_framework() -> None:
                 if content.type == "function_call":
                     # Accumulate function call content as it streams in
                     call_id = content.call_id
+                    assert call_id is not None, "Function call content must have a call_id"
                     if call_id in accumulated_calls:
                         # Add to existing call (arguments stream in gradually)
                         accumulated_calls[call_id] = accumulated_calls[call_id] + content
diff --git a/python/scripts/sample_validation/README.md b/python/scripts/sample_validation/README.md
index 064d9752da..d7d9f0a08a 100644
--- a/python/scripts/sample_validation/README.md
+++ b/python/scripts/sample_validation/README.md
@@ -165,18 +165,17 @@ Produces:
 
 ## Report Status Codes
 
-| Status  | Label     | Description                               |
-| ------- | --------- | ----------------------------------------- |
-| SUCCESS | [PASS]    | Sample ran to completion with exit code 0 |
-| FAILURE | [FAIL]    | Sample exited with non-zero code          |
-| TIMEOUT | [TIMEOUT] | Sample exceeded timeout limit             |
-| ERROR   | [ERROR]   | Exception during execution                |
+| Status        | Label           | Description                               |
+| ------------- | --------------- | ----------------------------------------- |
+| SUCCESS       | [PASS]          | Sample ran to completion with exit code 0 |
+| FAILURE       | [FAIL]          | Sample did not complete successfully (non-zero exit code) |
+| MISSING_SETUP | [MISSING_SETUP] | Sample skipped due to missing setup       |
 
 ## Troubleshooting
 
 ### Agent output parsing errors
 
-If an agent returns non-JSON content, that sample is marked as `ERROR` with parser details in the report.
+If an agent returns non-JSON content, that sample is marked as `FAILURE` with parser details in the report.
 
 ### GitHub Copilot authentication or CLI issues
 
diff --git a/python/scripts/sample_validation/__main__.py b/python/scripts/sample_validation/__main__.py
index 5d222b94b9..948fed3a30 100644
--- a/python/scripts/sample_validation/__main__.py
+++ b/python/scripts/sample_validation/__main__.py
@@ -75,6 +75,13 @@ Examples:
         help="Custom name for the report files (without extension). If not provided, uses timestamp.",
     )
 
+    parser.add_argument(
+        "--exclude",
+        nargs="+",
+        type=str,
+        help="Subdirectory paths to exclude (relative to the search directory set by --subdir)",
+    )
+
     return parser.parse_args()
 
 
@@ -104,6 +111,7 @@ async def main() -> int:
         samples_dir=samples_dir,
         python_root=python_root,
         subdir=args.subdir,
+        exclude=args.exclude,
         max_parallel_workers=max(1, args.max_parallel_workers),
     )
 
@@ -138,7 +146,7 @@ async def main() -> int:
         print(f"   JSON: {json_path}")
 
     # Return appropriate exit code
-    failed = report.failure_count + report.timeout_count + report.error_count
+    failed = report.failure_count + report.missing_setup_count
     return 1 if failed > 0 else 0
 
 
diff --git a/python/scripts/sample_validation/aggregate.py b/python/scripts/sample_validation/aggregate.py
new file mode 100644
index 0000000000..478bfeafdb
--- /dev/null
+++ b/python/scripts/sample_validation/aggregate.py
@@ -0,0 +1,224 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Aggregate validation reports across runs and produce a trend report.
+
+Reads JSON reports from individual validation jobs, combines them with
+cached history from previous runs, and produces a markdown trend report
+showing per-sample status over the last 5 runs.
+
+Usage:
+    python aggregate.py <reports-dir> <history-file> <output-file>
+"""
+
+import json
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+MAX_HISTORY = 5
+
+STATUS_EMOJI = {
+    "success": "✅",
+    "failure": "❌",
+    "missing_setup": "⚠️",
+}
+
+
+def _format_run_label(timestamp: str) -> str:
+    """Format a run timestamp as a compact column label (e.g. '03-24 18:05')."""
+    try:
+        dt = datetime.fromisoformat(timestamp)
+        return dt.strftime("%m-%d %H:%M")
+    except (ValueError, TypeError):
+        return timestamp[:16]
+
+
+def load_current_run(reports_dir: Path) -> dict[str, Any]:
+    """Load all JSON report files from the current run and merge them."""
+    combined_results: dict[str, str] = {}
+    total = success = failure = missing = 0
+
+    json_files = sorted(reports_dir.glob("*.json"))
+    if not json_files:
+        print(f"Warning: No JSON report files found in {reports_dir}")
+        return {
+            "timestamp": datetime.now().isoformat(),
+            "summary": {
+                "total_samples": 0,
+                "success_count": 0,
+                "failure_count": 0,
+                "missing_setup_count": 0,
+            },
+            "results": {},
+        }
+
+    for json_file in json_files:
+        print(f"  Loading report: {json_file.name}")
+        with open(json_file, encoding="utf-8") as f:
+            report = json.load(f)
+        for result in report["results"]:
+            combined_results[result["path"]] = result["status"]
+        summary = report["summary"]
+        total += summary["total_samples"]
+        success += summary["success_count"]
+        failure += summary["failure_count"]
+        missing += summary["missing_setup_count"]
+
+    return {
+        "timestamp": datetime.now().isoformat(),
+        "summary": {
+            "total_samples": total,
+            "success_count": success,
+            "failure_count": failure,
+            "missing_setup_count": missing,
+        },
+        "results": combined_results,
+    }
+
+
+def load_history(history_path: Path) -> list[dict[str, Any]]:
+    """Load previous run history from cache."""
+    if history_path.exists():
+        with open(history_path, encoding="utf-8") as f:
+            data = json.load(f)
+        runs = data.get("runs", [])
+        print(f"  Loaded {len(runs)} previous run(s) from history")
+        return runs
+    print("  No previous history found")
+    return []
+
+
+def save_history(history_path: Path, runs: list[dict[str, Any]]) -> None:
+    """Save run history, keeping only the last MAX_HISTORY entries."""
+    history_path.parent.mkdir(parents=True, exist_ok=True)
+    trimmed = runs[-MAX_HISTORY:]
+    with open(history_path, "w", encoding="utf-8") as f:
+        json.dump({"runs": trimmed}, f, indent=2)
+    print(f"  Saved {len(trimmed)} run(s) to history")
+
+
+def generate_trend_report(runs: list[dict[str, Any]]) -> str:
+    """Generate a markdown trend report from run history."""
+    lines = [
+        "# Sample Validation Trend Report",
+        "",
+        f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
+        "",
+    ]
+
+    # --- Overall status table (most recent first) ---
+    lines.append("## Overall Status (Last 5 Runs)")
+    lines.append("")
+    lines.append("| Run | Success | Failure | Missing Setup | Total |")
+    lines.append("|-----|---------|---------|---------------|-------|")
+
+    for run in reversed(runs):
+        s = run["summary"]
+        label = _format_run_label(run["timestamp"])
+        lines.append(
+            f"| {label} | {s['success_count']}/{s['total_samples']} "
+            f"| {s['failure_count']}/{s['total_samples']} "
+            f"| {s['missing_setup_count']}/{s['total_samples']} "
+            f"| {s['total_samples']} |"
+        )
+
+    # Pad with N/A rows if fewer than 5 runs
+    for _ in range(MAX_HISTORY - len(runs)):
+        lines.append("| N/A | N/A | N/A | N/A | N/A |")
+
+    lines.append("")
+
+    # --- Per-sample results table ---
+    lines.append("## Per-Sample Results")
+    lines.append("")
+
+    # Collect all sample paths across all runs
+    all_paths: set[str] = set()
+    for run in runs:
+        all_paths.update(run["results"].keys())
+
+    if not all_paths:
+        lines.append("*No sample results available.*")
+        return "\n".join(lines)
+
+    # Build header (most recent run first)
+    header = "| Sample |"
+    separator = "|--------|"
+    for run in reversed(runs):
+        label = _format_run_label(run["timestamp"])
+        header += f" {label} |"
+        separator += "------------|"
+    for _ in range(MAX_HISTORY - len(runs)):
+        header += " N/A |"
+        separator += "-----|"
+
+    lines.append(header)
+    lines.append(separator)
+
+    for path in sorted(all_paths):
+        row = f"| `{path}` |"
+        for run in reversed(runs):
+            status = run["results"].get(path, "N/A")
+            emoji = STATUS_EMOJI.get(status, "N/A")
+            row += f" {emoji} |"
+        for _ in range(MAX_HISTORY - len(runs)):
+            row += " N/A |"
+        lines.append(row)
+
+    lines.append("")
+    lines.append("**Legend:** ✅ Success · ❌ Failure · ⚠️ Missing Setup · N/A Not available")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+def main() -> int:
+    if len(sys.argv) != 4:
+        print("Usage: python aggregate.py <reports-dir> <history-file> <output-file>")
+        return 1
+
+    reports_dir = Path(sys.argv[1])
+    history_path = Path(sys.argv[2])
+    output_path = Path(sys.argv[3])
+
+    print("Aggregating validation results...")
+
+    # Load current run's reports
+    print(f"\nLoading reports from {reports_dir}:")
+    current_run = load_current_run(reports_dir)
+    s = current_run["summary"]
+    print(
+        f"  Current run: {s['success_count']} success, "
+        f"{s['failure_count']} failure, "
+        f"{s['missing_setup_count']} missing setup "
+        f"(total: {s['total_samples']})"
+    )
+
+    # Load history and append current run
+    print(f"\nLoading history from {history_path}:")
+    runs = load_history(history_path)
+    runs.append(current_run)
+    runs = runs[-MAX_HISTORY:]
+
+    # Save updated history
+    print(f"\nSaving history to {history_path}:")
+    save_history(history_path, runs)
+
+    # Generate trend report
+    print("\nGenerating trend report...")
+    report = generate_trend_report(runs)
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(report, encoding="utf-8")
+    print(f"Trend report written to {output_path}")
+
+    # Also print the report to stdout
+    print("\n" + "=" * 80)
+    print(report)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/python/scripts/sample_validation/create_dynamic_workflow_executor.py b/python/scripts/sample_validation/create_dynamic_workflow_executor.py
index 69c5cc9a5e..4cffd5c71b 100644
--- a/python/scripts/sample_validation/create_dynamic_workflow_executor.py
+++ b/python/scripts/sample_validation/create_dynamic_workflow_executor.py
@@ -14,7 +14,8 @@ from agent_framework import (
     handler,
 )
 from agent_framework.github import GitHubCopilotAgent
-from copilot.types import PermissionRequest, PermissionRequestResult
+from copilot.generated.session_events import PermissionRequest
+from copilot.types import PermissionRequestResult
 from pydantic import BaseModel
 from typing_extensions import Never
 
@@ -36,6 +37,7 @@ class AgentResponseFormat(BaseModel):
     status: str
     output: str
     error: str
+    fix: str
 
 
 @dataclass
@@ -54,15 +56,20 @@ class BatchCompletion:
 
 AgentInstruction = (
     "You are validating exactly one Python sample.\n"
-    "Analyze the sample code and execute it. Based on the execution result, determine if it "
-    "runs successfully, fails, or times out. Feel free to install any required dependencies.\n"
+    "Analyze the sample code and execute it as it is. Based on the execution result, determine "
+    "if it runs successfully, fails, or is missing_setup. Use `missing_setup` if the sample reports "
+    "missing required environment variables. The environment you're given should contain the necessary "
+    "variables. Don't create new environment variables nor modify the sample code.\n"
+    "Feel free to install any required dependencies if needed.\n"
     "The sample can be interactive. If it is interactive, respond to the sample when prompted "
     "based on your analysis of the code. You do not need to consult human on what to respond.\n"
+    "If the sample fails, investigate the error and suggest a fix.\n"
     "Return ONLY valid JSON with this schema:\n"
     "{\n"
-    '  "status": "success|failure|timeout|error",\n'
+    '  "status": "success|failure|missing_setup",\n'
     '  "output": "short summary of the result and what you did if the sample was interactive",\n'
-    '  "error": "error details or empty string"\n'
+    '  "error": "error details or empty string",\n'
+    '  "fix": "suggested code fix if the sample failed, otherwise empty string"\n'
     "}\n\n"
 )
 
@@ -87,16 +94,15 @@ def status_from_text(value: str) -> RunStatus:
     for status in RunStatus:
         if status.value == normalized:
             return status
-    return RunStatus.ERROR
+    return RunStatus.FAILURE
 
 
 def prompt_permission(
     request: PermissionRequest, context: dict[str, str]
 ) -> PermissionRequestResult:
     """Permission handler that always approves."""
-    kind = request.get("kind", "unknown")
     logger.debug(
-        f"[Permission Request: {kind}] ({context})Automatically approved for sample validation."
+        f"[Permission Request: {request.kind}] ({context})Automatically approved for sample validation."
     )
     return PermissionRequestResult(kind="approved")
 
@@ -108,39 +114,73 @@ class CustomAgentExecutor(Executor):
     returned as error responses, otherwise an exception in one agent could crash the entire workflow.
     """
 
+    # Retry in case GitHub Copilot agent encounters transient errors unrelated to the sample execution.
+    RETRY_COUNT = 1
+
     def __init__(self, agent: GitHubCopilotAgent):
         super().__init__(id=agent.id)
         self.agent = agent
+        self._session = agent.create_session()
 
     @handler
     async def handle_task(
         self, sample: SampleInfo, ctx: WorkflowContext[WorkerFreed | RunResult]
     ) -> None:
         """Execute one sample task and notify collector + coordinator."""
-        try:
-            response = await self.agent.run(
-                [
-                    Message(
-                        role="user",
-                        text=f"Validate the following sample:\n\n{sample.relative_path}",
+        current_retry = 0
+        while True:
+            try:
+                response = await self.agent.run(
+                    [
+                        Message(
+                            role="user",
+                            text=f"Validate the following sample:\n\n{sample.relative_path}",
+                        )
+                    ],
+                    session=self._session,
+                )
+                result_payload = parse_agent_json(response.text)
+                result = RunResult(
+                    sample=sample,
+                    status=status_from_text(result_payload.status),
+                    output=result_payload.output,
+                    error=result_payload.error,
+                    fix=result_payload.fix,
+                )
+                break
+            except Exception as ex:
+                if current_retry < self.RETRY_COUNT:
+                    logger.warning(
+                        f"Error executing agent {self.agent.id} (attempt {current_retry + 1}/{self.RETRY_COUNT}): {ex}. Retrying..."
                     )
-                ]
-            )
-            result_payload = parse_agent_json(response.text)
-            result = RunResult(
-                sample=sample,
-                status=status_from_text(result_payload.status),
-                output=result_payload.output,
-                error=result_payload.error,
-            )
-        except Exception as ex:
-            logger.error(f"Error executing agent {self.agent.id}: {ex}")
-            result = RunResult(
-                sample=sample,
-                status=RunStatus.ERROR,
-                output="",
-                error=str(ex),
-            )
+                    try:
+                        current_retry += 1
+                        await self.agent.stop()
+                        await self.agent.start()
+                        self._session = self.agent.create_session()  # Reset session for retry
+                        continue
+                    except Exception as restart_ex:
+                        logger.error(
+                            f"Error restarting agent {self.agent.id}: {restart_ex}. No more retries."
+                        )
+                        result = RunResult(
+                            sample=sample,
+                            status=RunStatus.FAILURE,
+                            output="",
+                            error=f"Original error: {ex}. Restart error: {restart_ex}",
+                            fix="",
+                        )
+                        break
+
+                logger.error(f"Error executing agent {self.agent.id}: {ex}")
+                result = RunResult(
+                    sample=sample,
+                    status=RunStatus.FAILURE,
+                    output="",
+                    error=str(ex),
+                    fix="",
+                )
+                break
 
         await ctx.send_message(result, target_id="collector")
         await ctx.send_message(WorkerFreed(worker_id=self.id), target_id="coordinator")
@@ -252,7 +292,7 @@ class CreateConcurrentValidationWorkflowExecutor(Executor):
                 instructions=AgentInstruction,
                 default_options={
                     "on_permission_request": prompt_permission,
-                    "timeout": 180,
+                    "timeout": 60,
                 },  # type: ignore
             )
             agents.append(agent)
diff --git a/python/scripts/sample_validation/discovery.py b/python/scripts/sample_validation/discovery.py
index 78eb1c9bfa..c5424dd6ee 100644
--- a/python/scripts/sample_validation/discovery.py
+++ b/python/scripts/sample_validation/discovery.py
@@ -52,13 +52,18 @@ def _has_main_entrypoint_guard(path: Path) -> bool:
     )
 
 
-def discover_samples(samples_dir: Path, subdir: str | None = None) -> list[SampleInfo]:
+def discover_samples(
+    samples_dir: Path,
+    subdir: str | None = None,
+    exclude: list[str] | None = None,
+) -> list[SampleInfo]:
     """
     Find all Python sample files in the samples directory.
 
     Args:
         samples_dir: Root samples directory
         subdir: Optional subdirectory to filter to
+        exclude: Optional list of subdirectory paths (relative to the search directory) to exclude
 
     Returns:
         List of SampleInfo objects for each discovered sample
@@ -72,12 +77,21 @@ def discover_samples(samples_dir: Path, subdir: str | None = None) -> list[Sampl
     else:
         search_dir = samples_dir
 
+    # Resolve excluded paths to absolute for reliable comparison
+    exclude_paths = {(search_dir / exc).resolve() for exc in (exclude or [])}
+
     python_files: list[Path] = []
 
     # Walk through all subdirectories and find .py files
     for root, dirs, files in os.walk(search_dir):
-        # Skip directories that start with _ (like _sample_validation)
-        dirs[:] = [d for d in dirs if not d.startswith("_") and d != "__pycache__"]
+        # Skip directories that start with _, __pycache__, or excluded paths
+        dirs[:] = [
+            d
+            for d in dirs
+            if not d.startswith("_")
+            and d != "__pycache__"
+            and (Path(root) / d).resolve() not in exclude_paths
+        ]
 
         for file in files:
             # Skip files that start with _ and include only scripts with a main entrypoint guard
@@ -113,8 +127,10 @@ class DiscoverSamplesExecutor(Executor):
         print(f"🔍 Discovering samples in {self.config.samples_dir}")
         if self.config.subdir:
             print(f"   Filtering to subdirectory: {self.config.subdir}")
+        if self.config.exclude:
+            print(f"   Excluding: {', '.join(self.config.exclude)}")
 
-        samples = discover_samples(self.config.samples_dir, self.config.subdir)
+        samples = discover_samples(self.config.samples_dir, self.config.subdir, self.config.exclude)
         print(f"   Found {len(samples)} samples")
 
         await ctx.send_message(DiscoveryResult(samples=samples))
diff --git a/python/scripts/sample_validation/models.py b/python/scripts/sample_validation/models.py
index ca9f26adab..ff45b5909b 100644
--- a/python/scripts/sample_validation/models.py
+++ b/python/scripts/sample_validation/models.py
@@ -18,6 +18,7 @@ class ValidationConfig:
     samples_dir: Path
     python_root: Path
     subdir: str | None = None
+    exclude: list[str] | None = None
     max_parallel_workers: int = 10
 
 
@@ -60,8 +61,7 @@ class RunStatus(Enum):
 
     SUCCESS = "success"
     FAILURE = "failure"
-    TIMEOUT = "timeout"
-    ERROR = "error"
+    MISSING_SETUP = "missing_setup"
 
 
 @dataclass
@@ -72,6 +72,7 @@ class RunResult:
     status: RunStatus
     output: str
     error: str
+    fix: str
 
 
 @dataclass
@@ -89,8 +90,7 @@ class Report:
     total_samples: int
     success_count: int
     failure_count: int
-    timeout_count: int
-    error_count: int
+    missing_setup_count: int
     results: list[RunResult] = field(default_factory=list)  # type: ignore
 
     def to_markdown(self) -> str:
@@ -107,15 +107,14 @@ class Report:
             f"| Total Samples | {self.total_samples} |",
             f"| [PASS] Success | {self.success_count} |",
             f"| [FAIL] Failure | {self.failure_count} |",
-            f"| [TIMEOUT] Timeout | {self.timeout_count} |",
-            f"| [ERROR] Error | {self.error_count} |",
+            f"| [MISSING_SETUP] Missing Setup | {self.missing_setup_count} |",
             "",
             "## Detailed Results",
             "",
         ]
 
         # Group by status
-        for status in [RunStatus.FAILURE, RunStatus.TIMEOUT, RunStatus.ERROR, RunStatus.SUCCESS]:
+        for status in [RunStatus.FAILURE, RunStatus.MISSING_SETUP, RunStatus.SUCCESS]:
             status_results = [r for r in self.results if r.status == status]
             if not status_results:
                 continue
@@ -123,8 +122,7 @@ class Report:
             status_label = {
                 RunStatus.SUCCESS: "[PASS]",
                 RunStatus.FAILURE: "[FAIL]",
-                RunStatus.TIMEOUT: "[TIMEOUT]",
-                RunStatus.ERROR: "[ERROR]",
+                RunStatus.MISSING_SETUP: "[MISSING_SETUP]",
             }
 
             lines.append(f"### {status_label[status]} {status.value.title()} ({len(status_results)})")
@@ -148,8 +146,7 @@ class Report:
                 "total_samples": self.total_samples,
                 "success_count": self.success_count,
                 "failure_count": self.failure_count,
-                "timeout_count": self.timeout_count,
-                "error_count": self.error_count,
+                "missing_setup_count": self.missing_setup_count,
             },
             "results": [
                 {
@@ -157,6 +154,7 @@ class Report:
                     "status": r.status.value,
                     "output": r.output,
                     "error": r.error,
+                    "fix": r.fix,
                 }
                 for r in self.results
             ],
diff --git a/python/scripts/sample_validation/report.py b/python/scripts/sample_validation/report.py
index db8eddeed1..10c4ff0406 100644
--- a/python/scripts/sample_validation/report.py
+++ b/python/scripts/sample_validation/report.py
@@ -22,12 +22,11 @@ def generate_report(results: list[RunResult]) -> Report:
     Returns:
         Report object with aggregated statistics
     """
-    # Sort results: failures, timeouts, errors first, then successes
+    # Sort results: failures, missing setup first, then successes
     status_priority = {
         RunStatus.FAILURE: 0,
-        RunStatus.TIMEOUT: 1,
-        RunStatus.ERROR: 2,
-        RunStatus.SUCCESS: 3,
+        RunStatus.MISSING_SETUP: 1,
+        RunStatus.SUCCESS: 2,
     }
     sorted_results = sorted(results, key=lambda r: status_priority[r.status])
 
@@ -36,8 +35,7 @@ def generate_report(results: list[RunResult]) -> Report:
         total_samples=len(results),
         success_count=sum(1 for r in results if r.status == RunStatus.SUCCESS),
         failure_count=sum(1 for r in results if r.status == RunStatus.FAILURE),
-        timeout_count=sum(1 for r in results if r.status == RunStatus.TIMEOUT),
-        error_count=sum(1 for r in results if r.status == RunStatus.ERROR),
+        missing_setup_count=sum(1 for r in results if r.status == RunStatus.MISSING_SETUP),
         results=sorted_results,
     )
 
@@ -86,8 +84,7 @@ def print_summary(report: Report) -> None:
 
     if (
         report.failure_count == 0
-        and report.timeout_count == 0
-        and report.error_count == 0
+        and report.missing_setup_count == 0
     ):
         print("[PASS] ALL SAMPLES PASSED!")
     else:
@@ -98,8 +95,7 @@ def print_summary(report: Report) -> None:
     print("Results:")
     print(f"  [PASS] Success: {report.success_count}")
     print(f"  [FAIL] Failure: {report.failure_count}")
-    print(f"  [TIMEOUT] Timeout: {report.timeout_count}")
-    print(f"  [ERR] Errors: {report.error_count}")
+    print(f"  [MISSING_SETUP] Missing Setup: {report.missing_setup_count}")
     print("=" * 80)
 
     # Print JSON output for GitHub Actions visibility
diff --git a/python/scripts/sample_validation/run_dynamic_validation_workflow_executor.py b/python/scripts/sample_validation/run_dynamic_validation_workflow_executor.py
index 6f28dc9244..c7244cff2a 100644
--- a/python/scripts/sample_validation/run_dynamic_validation_workflow_executor.py
+++ b/python/scripts/sample_validation/run_dynamic_validation_workflow_executor.py
@@ -66,9 +66,10 @@ class RunDynamicValidationWorkflowExecutor(Executor):
                 fallback_results = [
                     RunResult(
                         sample=sample,
-                        status=RunStatus.ERROR,
+                        status=RunStatus.FAILURE,
                         output="",
                         error="Nested workflow did not return an ExecutionResult.",
+                        fix="",
                     )
                     for sample in creation.samples
                 ]