Python: Update sample validation scripts (#4870)

* Update sample validation scripts * Adjust prompt * Update autogen-migration samples * Add fix suggestion * Split jobs * Add .env * Create trend report * Add timestamp * Add more env vars * Comments * force node24 * force node24 * force node22
2026-06-16 21:04:09 +08:00 · 2026-03-24 18:21:32 -07:00
parent 2c000b032d
commit 4b533608b6
19 changed files with 928 additions and 202 deletions
@@ -24,7 +24,9 @@ runs:
  using: "composite"
  steps:
    - name: Set up Node.js environment
-      uses: actions/setup-node@v4
+      uses: actions/setup-node@v6
+      with:
+        node-version: 22

    - name: Install Copilot CLI
      shell: bash
@@ -41,6 +41,13 @@ jobs:
          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
          os: ${{ runner.os }}

+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+
      - name: Run sample validation
        run: |
          cd scripts && uv run python -m sample_validation --subdir 01-get-started --save-report --report-name 01-get-started
@@ -50,7 +57,7 @@ jobs:
        if: always()
        with:
          name: validation-report-01-get-started
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/

  validate-02-agents:
    name: Validate 02-agents
@@ -64,10 +71,13 @@ jobs:
      AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
      AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
      AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+      AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__EMBEDDINGDEPLOYMENTNAME }}
      # OpenAI configuration
      OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
      OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
      OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+      # GitHub MCP
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
      # Observability
      ENABLE_INSTRUMENTATION: "true"
    defaults:
@@ -84,16 +94,420 @@ jobs:
          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
          os: ${{ runner.os }}

+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME=$AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME" >> .env
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+          echo "GITHUB_PAT=$GITHUB_PAT" >> .env
+
      - name: Run sample validation
        run: |
-          cd scripts && uv run python -m sample_validation --subdir 02-agents --save-report --report-name 02-agents
+          cd scripts && uv run python -m sample_validation --subdir 02-agents --exclude providers --save-report --report-name 02-agents

      - name: Upload validation report
        uses: actions/upload-artifact@v7
        if: always()
        with:
          name: validation-report-02-agents
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-openai:
+    name: Validate 02-agents/providers/openai
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
+      OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
+      OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/openai --save-report --report-name 02-agents-openai
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-openai
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-azure-openai:
+    name: Validate 02-agents/providers/azure_openai
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
+      AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
+      AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
+      AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/azure_openai --save-report --report-name 02-agents-azure-openai
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-azure-openai
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-azure-ai:
+    name: Validate 02-agents/providers/azure_ai
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
+      AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+      AZURE_AI_CHAT_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
+      AZURE_AI_EMBEDDING_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__EMBEDDINGDEPLOYMENTNAME }}
+      BING_CONNECTION_ID: ${{ secrets.BING_CONNECTION_ID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_AI_CHAT_MODEL_DEPLOYMENT_NAME=$AZURE_AI_CHAT_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_AI_EMBEDDING_MODEL_DEPLOYMENT_NAME=$AZURE_AI_EMBEDDING_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "BING_CONNECTION_ID=$BING_CONNECTION_ID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/azure_ai --save-report --report-name 02-agents-azure-ai
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-azure-ai
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-azure-ai-agent:
+    name: Validate 02-agents/providers/azure_ai_agent
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      AZURE_AI_PROJECT_ENDPOINT: ${{ vars.AZURE_AI_PROJECT_ENDPOINT }}
+      AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/azure_ai_agent --save-report --report-name 02-agents-azure-ai-agent
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-azure-ai-agent
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-anthropic:
+    name: Validate 02-agents/providers/anthropic
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+      ANTHROPIC_CHAT_MODEL_ID: ${{ vars.ANTHROPIC_CHAT_MODEL_ID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
+          echo "ANTHROPIC_CHAT_MODEL_ID=$ANTHROPIC_CHAT_MODEL_ID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/anthropic --save-report --report-name 02-agents-anthropic
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-anthropic
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-github-copilot:
+    name: Validate 02-agents/providers/github_copilot
+    runs-on: ubuntu-latest
+    environment: integration
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/github_copilot --save-report --report-name 02-agents-github-copilot
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-github-copilot
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-amazon:
+    name: Validate 02-agents/providers/amazon
+    if: false  # Temporarily disabled - requires AWS credentials
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      BEDROCK_CHAT_MODEL_ID: ${{ vars.BEDROCK__CHATMODELID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/amazon --save-report --report-name 02-agents-amazon
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-amazon
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-ollama:
+    name: Validate 02-agents/providers/ollama
+    if: false  # Temporarily disabled - requires local Ollama server
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      OLLAMA_MODEL: ${{ vars.OLLAMA__MODEL }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/ollama --save-report --report-name 02-agents-ollama
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-ollama
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-foundry-local:
+    name: Validate 02-agents/providers/foundry_local
+    if: false  # Temporarily disabled - requires local Foundry setup
+    runs-on: ubuntu-latest
+    environment: integration
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/foundry_local --save-report --report-name 02-agents-foundry-local
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-foundry-local
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-copilotstudio:
+    name: Validate 02-agents/providers/copilotstudio
+    if: false  # Temporarily disabled - requires Copilot Studio setup
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      COPILOTSTUDIOAGENT__ENVIRONMENTID: ${{ secrets.COPILOTSTUDIOAGENT__ENVIRONMENTID }}
+      COPILOTSTUDIOAGENT__SCHEMANAME: ${{ secrets.COPILOTSTUDIOAGENT__SCHEMANAME }}
+      COPILOTSTUDIOAGENT__TENANTID: ${{ secrets.COPILOTSTUDIOAGENT__TENANTID }}
+      COPILOTSTUDIOAGENT__AGENTAPPID: ${{ secrets.COPILOTSTUDIOAGENT__AGENTAPPID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Create .env for samples
+        run: |
+          echo "COPILOTSTUDIOAGENT__ENVIRONMENTID=$COPILOTSTUDIOAGENT__ENVIRONMENTID" >> .env
+          echo "COPILOTSTUDIOAGENT__SCHEMANAME=$COPILOTSTUDIOAGENT__SCHEMANAME" >> .env
+          echo "COPILOTSTUDIOAGENT__TENANTID=$COPILOTSTUDIOAGENT__TENANTID" >> .env
+          echo "COPILOTSTUDIOAGENT__AGENTAPPID=$COPILOTSTUDIOAGENT__AGENTAPPID" >> .env
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/copilotstudio --save-report --report-name 02-agents-copilotstudio
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-copilotstudio
+          path: python/samples/sample_validation/reports/
+
+  validate-02-agents-custom:
+    name: Validate 02-agents/providers/custom
+    runs-on: ubuntu-latest
+    environment: integration
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup environment
+        uses: ./.github/actions/sample-validation-setup
+        with:
+          azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          os: ${{ runner.os }}
+
+      - name: Run sample validation
+        run: |
+          cd scripts && uv run python -m sample_validation --subdir 02-agents/providers/custom --save-report --report-name 02-agents-custom
+
+      - name: Upload validation report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-report-02-agents-custom
+          path: python/samples/sample_validation/reports/

  validate-03-workflows:
    name: Validate 03-workflows
@@ -121,6 +535,14 @@ jobs:
          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
          os: ${{ runner.os }}

+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+
      - name: Run sample validation
        run: |
          cd scripts && uv run python -m sample_validation --subdir 03-workflows --save-report --report-name 03-workflows
@@ -130,7 +552,7 @@ jobs:
        if: always()
        with:
          name: validation-report-03-workflows
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/

  validate-04-hosting:
    name: Validate 04-hosting
@@ -169,7 +591,7 @@ jobs:
        if: always()
        with:
          name: validation-report-04-hosting
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/

  validate-05-end-to-end:
    name: Validate 05-end-to-end
@@ -213,7 +635,7 @@ jobs:
        if: always()
        with:
          name: validation-report-05-end-to-end
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/

  validate-autogen-migration:
    name: Validate autogen-migration
@@ -244,6 +666,16 @@ jobs:
          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
          os: ${{ runner.os }}

+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+
      - name: Run sample validation
        run: |
          cd scripts && uv run python -m sample_validation --subdir autogen-migration --save-report --report-name autogen-migration
@@ -253,7 +685,7 @@ jobs:
        if: always()
        with:
          name: validation-report-autogen-migration
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/

  validate-semantic-kernel-migration:
    name: Validate semantic-kernel-migration
@@ -290,6 +722,21 @@ jobs:
          azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
          os: ${{ runner.os }}

+      - name: Create .env for samples
+        run: |
+          echo "AZURE_AI_PROJECT_ENDPOINT=$AZURE_AI_PROJECT_ENDPOINT" >> .env
+          echo "AZURE_AI_MODEL_DEPLOYMENT_NAME=$AZURE_AI_MODEL_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_ENDPOINT=$AZURE_OPENAI_ENDPOINT" >> .env
+          echo "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=$AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" >> .env
+          echo "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=$AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME" >> .env
+          echo "OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
+          echo "OPENAI_CHAT_MODEL_ID=$OPENAI_CHAT_MODEL_ID" >> .env
+          echo "OPENAI_RESPONSES_MODEL_ID=$OPENAI_RESPONSES_MODEL_ID" >> .env
+          echo "COPILOTSTUDIOAGENT__ENVIRONMENTID=$COPILOTSTUDIOAGENT__ENVIRONMENTID" >> .env
+          echo "COPILOTSTUDIOAGENT__SCHEMANAME=$COPILOTSTUDIOAGENT__SCHEMANAME" >> .env
+          echo "COPILOTSTUDIOAGENT__TENANTID=$COPILOTSTUDIOAGENT__TENANTID" >> .env
+          echo "COPILOTSTUDIOAGENT__AGENTAPPID=$COPILOTSTUDIOAGENT__AGENTAPPID" >> .env
+
      - name: Run sample validation
        run: |
          cd scripts && uv run python -m sample_validation --subdir semantic-kernel-migration --save-report --report-name semantic-kernel-migration
@@ -299,4 +746,69 @@ jobs:
        if: always()
        with:
          name: validation-report-semantic-kernel-migration
-          path: python/scripts/sample_validation/reports/
+          path: python/samples/sample_validation/reports/
+
+  aggregate-results:
+    name: Aggregate Results
+    runs-on: ubuntu-latest
+    if: always()
+    needs:
+      - validate-01-get-started
+      - validate-02-agents
+      - validate-02-agents-openai
+      - validate-02-agents-azure-openai
+      - validate-02-agents-azure-ai
+      - validate-02-agents-azure-ai-agent
+      - validate-02-agents-anthropic
+      - validate-02-agents-github-copilot
+      - validate-02-agents-amazon
+      - validate-02-agents-ollama
+      - validate-02-agents-foundry-local
+      - validate-02-agents-copilotstudio
+      - validate-02-agents-custom
+      - validate-03-workflows
+      - validate-04-hosting
+      - validate-05-end-to-end
+      - validate-autogen-migration
+      - validate-semantic-kernel-migration
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Download all validation reports
+        uses: actions/download-artifact@v7
+        with:
+          pattern: validation-report-*
+          path: reports/
+          merge-multiple: true
+
+      - name: Restore validation history
+        id: cache-restore
+        uses: actions/cache/restore@v4
+        with:
+          path: validation-history/
+          key: validation-history-${{ github.run_id }}
+          restore-keys: |
+            validation-history-
+
+      - name: Aggregate results and generate trend report
+        run: |
+          python3 python/scripts/sample_validation/aggregate.py \
+            reports/ \
+            validation-history/history.json \
+            trend-report.md
+
+      - name: Write trend report to job summary
+        run: cat trend-report.md >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Save validation history
+        uses: actions/cache/save@v4
+        with:
+          path: validation-history/
+          key: validation-history-${{ github.run_id }}
+
+      - name: Upload trend report
+        uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: validation-trend-report
+          path: trend-report.md
@@ -5,7 +5,7 @@ import os
 from random import randint
 from typing import Annotated, Any, Literal

-from agent_framework import SupportsChatGetResponse, tool
+from agent_framework import Message, SupportsChatGetResponse, tool
 from agent_framework.azure import (
    AzureAIAgentClient,
    AzureOpenAIAssistantsClient,
@@ -117,35 +117,37 @@ async def main(client_name: ClientName = "openai_chat") -> None:
    client = get_client(client_name)

    # 1. Configure prompt and streaming mode.
-    message = "What's the weather in Amsterdam and in Paris?"
+    message = Message("user", text="What's the weather in Amsterdam and in Paris?")
    stream = os.getenv("STREAM", "false").lower() == "true"
    print(f"Client: {client_name}")
-    print(f"User: {message}")
+    print(f"User: {message.text}")

    # 2. Run with context-managed clients.
    if isinstance(client, OpenAIAssistantsClient | AzureOpenAIAssistantsClient | AzureAIAgentClient):
        async with client:
            if stream:
-                response_stream = client.get_response(message, stream=True, options={"tools": get_weather})
+                response_stream = client.get_response([message], stream=True, options={"tools": get_weather})
                print("Assistant: ", end="")
                async for chunk in response_stream:
                    if chunk.text:
                        print(chunk.text, end="")
                print("")
            else:
-                print(f"Assistant: {await client.get_response(message, stream=False, options={'tools': get_weather})}")
+                print(
+                    f"Assistant: {await client.get_response([message], stream=False, options={'tools': get_weather})}"
+                )
        return

    # 3. Run with non-context-managed clients.
    if stream:
-        response_stream = client.get_response(message, stream=True, options={"tools": get_weather})
+        response_stream = client.get_response([message], stream=True, options={"tools": get_weather})
        print("Assistant: ", end="")
        async for chunk in response_stream:
            if chunk.text:
                print(chunk.text, end="")
        print("")
    else:
-        print(f"Assistant: {await client.get_response(message, stream=False, options={'tools': get_weather})}")
+        print(f"Assistant: {await client.get_response([message], stream=False, options={'tools': get_weather})}")


 if __name__ == "__main__":
@@ -1,25 +1,17 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/01_round_robin_group_chat.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen RoundRobinGroupChat vs Agent Framework GroupChatBuilder/SequentialBuilder.

-Demonstrates sequential agent orchestration where agents take turns processing
-the task in a round-robin fashion.
-"""

 import asyncio

 from agent_framework import Message
 from dotenv import load_dotenv

+"""AutoGen RoundRobinGroupChat vs Agent Framework GroupChatBuilder/SequentialBuilder.
+
+Demonstrates sequential agent orchestration where agents take turns processing
+the task in a round-robin fashion.
+"""
+
 # Load environment variables from .env file
 load_dotenv()

@@ -98,7 +90,7 @@ async def run_agent_framework() -> None:
    print("[Agent Framework] Sequential conversation:")
    async for event in workflow.run("Create a brief summary about electric vehicles", stream=True):
        if event.type == "output" and isinstance(event.data, list):
-            for message in event.data:
+            for message in event.data:  # type: ignore
                if isinstance(message, Message) and message.role == "assistant" and message.text:
                    print(f"---------- {message.author_name} ----------")
                    print(message.text)
@@ -144,9 +136,7 @@ async def run_agent_framework_with_cycle() -> None:
        if last_message and "APPROVED" in last_message.text:
            await context.yield_output("Content approved.")
        else:
-            await context.send_message(
-                AgentExecutorRequest(messages=response.full_conversation, should_respond=True)
-            )
+            await context.send_message(AgentExecutorRequest(messages=response.full_conversation, should_respond=True))

    workflow = (
        WorkflowBuilder(start_executor=researcher)
@@ -1,25 +1,17 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/02_selector_group_chat.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen SelectorGroupChat vs Agent Framework GroupChatBuilder.

-Demonstrates LLM-based speaker selection where an orchestrator decides
-which agent should speak next based on the conversation context.
-"""

 import asyncio

 from agent_framework import Message
 from dotenv import load_dotenv

+"""AutoGen SelectorGroupChat vs Agent Framework GroupChatBuilder.
+
+Demonstrates LLM-based speaker selection where an orchestrator decides
+which agent should speak next based on the conversation context.
+"""
+
 # Load environment variables from .env file
 load_dotenv()

@@ -113,7 +105,7 @@ async def run_agent_framework() -> None:
    print("[Agent Framework] Group chat conversation:")
    async for event in workflow.run("How do I connect to a PostgreSQL database using Python?", stream=True):
        if event.type == "output" and isinstance(event.data, list):
-            for message in event.data:
+            for message in event.data:  # type: ignore
                if isinstance(message, Message) and message.role == "assistant" and message.text:
                    print(f"---------- {message.author_name} ----------")
                    print(message.text)
@@ -1,19 +1,4 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/03_swarm.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen Swarm pattern vs Agent Framework HandoffBuilder.
-
-Demonstrates agent handoff coordination where agents can transfer control
-to other specialized agents based on the task requirements.
-"""

 import asyncio
 from typing import Any
@@ -21,6 +6,12 @@ from typing import Any
 from agent_framework import AgentResponseUpdate, WorkflowEvent
 from dotenv import load_dotenv

+"""AutoGen Swarm pattern vs Agent Framework HandoffBuilder.
+
+Demonstrates agent handoff coordination where agents can transfer control
+to other specialized agents based on the task requirements.
+"""
+
 # Load environment variables from .env file
 load_dotenv()

@@ -1,19 +1,4 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/orchestrations/04_magentic_one.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen MagenticOneGroupChat vs Agent Framework MagenticBuilder.
-
-Demonstrates orchestrated multi-agent workflows with a central coordinator
-managing specialized agents for complex tasks.
-"""

 import asyncio
 import json
@@ -27,6 +12,12 @@ from agent_framework import (
 from agent_framework.orchestrations import MagenticProgressLedger
 from dotenv import load_dotenv

+"""AutoGen MagenticOneGroupChat vs Agent Framework MagenticBuilder.
+
+Demonstrates orchestrated multi-agent workflows with a central coordinator
+managing specialized agents for complex tasks.
+"""
+
 # Load environment variables from .env file
 load_dotenv()

@@ -1,14 +1,9 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/01_basic_assistant_agent.py
-
 # Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from dotenv import load_dotenv
+
 """Basic AutoGen AssistantAgent vs Agent Framework Agent.

 Both samples expect OpenAI-compatible environment variables (OPENAI_API_KEY or
@@ -16,10 +11,6 @@ Azure OpenAI configuration). Update the prompts or client wiring to match your
 model of choice before running.
 """

-import asyncio
-
-from dotenv import load_dotenv
-
 # Load environment variables from .env file
 load_dotenv()

@@ -1,24 +1,14 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-core",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/02_assistant_agent_with_tool.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen AssistantAgent vs Agent Framework Agent with function tools.
-
-Demonstrates how to create and attach tools to agents in both frameworks.
-"""

 import asyncio

 from dotenv import load_dotenv

+"""AutoGen AssistantAgent vs Agent Framework Agent with function tools.
+
+Demonstrates how to create and attach tools to agents in both frameworks.
+"""
+
 # Load environment variables from .env file
 load_dotenv()

@@ -1,23 +1,14 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/03_assistant_agent_thread_and_stream.py
-
 # Copyright (c) Microsoft. All rights reserved.
-"""AutoGen vs Agent Framework: Thread management and streaming responses.
-
-Demonstrates conversation state management and streaming in both frameworks.
-"""

 import asyncio

 from dotenv import load_dotenv

+"""AutoGen vs Agent Framework: Thread management and streaming responses.
+
+Demonstrates conversation state management and streaming in both frameworks.
+"""
+
 # Load environment variables from .env file
 load_dotenv()

@@ -1,24 +1,15 @@
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "autogen-agentchat",
-#     "autogen-ext[openai]",
-# ]
-# ///
-# Run with any PEP 723 compatible runner, e.g.:
-#   uv run samples/autogen-migration/single_agent/04_agent_as_tool.py
-
 # Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from dotenv import load_dotenv
+
 """AutoGen vs Agent Framework: Agent-as-a-Tool pattern.

 Demonstrates hierarchical agent architectures where one agent delegates
 work to specialized sub-agents wrapped as tools.
 """

-import asyncio
-
-from dotenv import load_dotenv
-
 # Load environment variables from .env file
 load_dotenv()

@@ -107,6 +98,7 @@ async def run_agent_framework() -> None:
                if content.type == "function_call":
                    # Accumulate function call content as it streams in
                    call_id = content.call_id
+                    assert call_id is not None, "Function call content must have a call_id"
                    if call_id in accumulated_calls:
                        # Add to existing call (arguments stream in gradually)
                        accumulated_calls[call_id] = accumulated_calls[call_id] + content
@@ -165,18 +165,17 @@ Produces:

 ## Report Status Codes

-| Status  | Label     | Description                               |
-| ------- | --------- | ----------------------------------------- |
-| SUCCESS | [PASS]    | Sample ran to completion with exit code 0 |
-| FAILURE | [FAIL]    | Sample exited with non-zero code          |
-| TIMEOUT | [TIMEOUT] | Sample exceeded timeout limit             |
-| ERROR   | [ERROR]   | Exception during execution                |
+| Status        | Label           | Description                               |
+| ------------- | --------------- | ----------------------------------------- |
+| SUCCESS       | [PASS]          | Sample ran to completion with exit code 0 |
+| FAILURE       | [FAIL]          | Sample did not complete successfully (non-zero exit code) |
+| MISSING_SETUP | [MISSING_SETUP] | Sample skipped due to missing setup       |

 ## Troubleshooting

 ### Agent output parsing errors

-If an agent returns non-JSON content, that sample is marked as `ERROR` with parser details in the report.
+If an agent returns non-JSON content, that sample is marked as `FAILURE` with parser details in the report.

 ### GitHub Copilot authentication or CLI issues

@@ -75,6 +75,13 @@ Examples:
        help="Custom name for the report files (without extension). If not provided, uses timestamp.",
    )

+    parser.add_argument(
+        "--exclude",
+        nargs="+",
+        type=str,
+        help="Subdirectory paths to exclude (relative to the search directory set by --subdir)",
+    )
+
    return parser.parse_args()


@@ -104,6 +111,7 @@ async def main() -> int:
        samples_dir=samples_dir,
        python_root=python_root,
        subdir=args.subdir,
+        exclude=args.exclude,
        max_parallel_workers=max(1, args.max_parallel_workers),
    )

@@ -138,7 +146,7 @@ async def main() -> int:
        print(f"   JSON: {json_path}")

    # Return appropriate exit code
-    failed = report.failure_count + report.timeout_count + report.error_count
+    failed = report.failure_count + report.missing_setup_count
    return 1 if failed > 0 else 0


@@ -0,0 +1,224 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Aggregate validation reports across runs and produce a trend report.
+
+Reads JSON reports from individual validation jobs, combines them with
+cached history from previous runs, and produces a markdown trend report
+showing per-sample status over the last 5 runs.
+
+Usage:
+    python aggregate.py <reports-dir> <history-file> <output-file>
+"""
+
+import json
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+MAX_HISTORY = 5
+
+STATUS_EMOJI = {
+    "success": "✅",
+    "failure": "❌",
+    "missing_setup": "⚠️",
+}
+
+
+def _format_run_label(timestamp: str) -> str:
+    """Format a run timestamp as a compact column label (e.g. '03-24 18:05')."""
+    try:
+        dt = datetime.fromisoformat(timestamp)
+        return dt.strftime("%m-%d %H:%M")
+    except (ValueError, TypeError):
+        return timestamp[:16]
+
+
+def load_current_run(reports_dir: Path) -> dict[str, Any]:
+    """Load all JSON report files from the current run and merge them."""
+    combined_results: dict[str, str] = {}
+    total = success = failure = missing = 0
+
+    json_files = sorted(reports_dir.glob("*.json"))
+    if not json_files:
+        print(f"Warning: No JSON report files found in {reports_dir}")
+        return {
+            "timestamp": datetime.now().isoformat(),
+            "summary": {
+                "total_samples": 0,
+                "success_count": 0,
+                "failure_count": 0,
+                "missing_setup_count": 0,
+            },
+            "results": {},
+        }
+
+    for json_file in json_files:
+        print(f"  Loading report: {json_file.name}")
+        with open(json_file, encoding="utf-8") as f:
+            report = json.load(f)
+        for result in report["results"]:
+            combined_results[result["path"]] = result["status"]
+        summary = report["summary"]
+        total += summary["total_samples"]
+        success += summary["success_count"]
+        failure += summary["failure_count"]
+        missing += summary["missing_setup_count"]
+
+    return {
+        "timestamp": datetime.now().isoformat(),
+        "summary": {
+            "total_samples": total,
+            "success_count": success,
+            "failure_count": failure,
+            "missing_setup_count": missing,
+        },
+        "results": combined_results,
+    }
+
+
+def load_history(history_path: Path) -> list[dict[str, Any]]:
+    """Load previous run history from cache."""
+    if history_path.exists():
+        with open(history_path, encoding="utf-8") as f:
+            data = json.load(f)
+        runs = data.get("runs", [])
+        print(f"  Loaded {len(runs)} previous run(s) from history")
+        return runs
+    print("  No previous history found")
+    return []
+
+
+def save_history(history_path: Path, runs: list[dict[str, Any]]) -> None:
+    """Save run history, keeping only the last MAX_HISTORY entries."""
+    history_path.parent.mkdir(parents=True, exist_ok=True)
+    trimmed = runs[-MAX_HISTORY:]
+    with open(history_path, "w", encoding="utf-8") as f:
+        json.dump({"runs": trimmed}, f, indent=2)
+    print(f"  Saved {len(trimmed)} run(s) to history")
+
+
+def generate_trend_report(runs: list[dict[str, Any]]) -> str:
+    """Generate a markdown trend report from run history."""
+    lines = [
+        "# Sample Validation Trend Report",
+        "",
+        f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
+        "",
+    ]
+
+    # --- Overall status table (most recent first) ---
+    lines.append("## Overall Status (Last 5 Runs)")
+    lines.append("")
+    lines.append("| Run | Success | Failure | Missing Setup | Total |")
+    lines.append("|-----|---------|---------|---------------|-------|")
+
+    for run in reversed(runs):
+        s = run["summary"]
+        label = _format_run_label(run["timestamp"])
+        lines.append(
+            f"| {label} | {s['success_count']}/{s['total_samples']} "
+            f"| {s['failure_count']}/{s['total_samples']} "
+            f"| {s['missing_setup_count']}/{s['total_samples']} "
+            f"| {s['total_samples']} |"
+        )
+
+    # Pad with N/A rows if fewer than 5 runs
+    for _ in range(MAX_HISTORY - len(runs)):
+        lines.append("| N/A | N/A | N/A | N/A | N/A |")
+
+    lines.append("")
+
+    # --- Per-sample results table ---
+    lines.append("## Per-Sample Results")
+    lines.append("")
+
+    # Collect all sample paths across all runs
+    all_paths: set[str] = set()
+    for run in runs:
+        all_paths.update(run["results"].keys())
+
+    if not all_paths:
+        lines.append("*No sample results available.*")
+        return "\n".join(lines)
+
+    # Build header (most recent run first)
+    header = "| Sample |"
+    separator = "|--------|"
+    for run in reversed(runs):
+        label = _format_run_label(run["timestamp"])
+        header += f" {label} |"
+        separator += "------------|"
+    for _ in range(MAX_HISTORY - len(runs)):
+        header += " N/A |"
+        separator += "-----|"
+
+    lines.append(header)
+    lines.append(separator)
+
+    for path in sorted(all_paths):
+        row = f"| `{path}` |"
+        for run in reversed(runs):
+            status = run["results"].get(path, "N/A")
+            emoji = STATUS_EMOJI.get(status, "N/A")
+            row += f" {emoji} |"
+        for _ in range(MAX_HISTORY - len(runs)):
+            row += " N/A |"
+        lines.append(row)
+
+    lines.append("")
+    lines.append("**Legend:** ✅ Success · ❌ Failure · ⚠️ Missing Setup · N/A Not available")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+def main() -> int:
+    if len(sys.argv) != 4:
+        print("Usage: python aggregate.py <reports-dir> <history-file> <output-file>")
+        return 1
+
+    reports_dir = Path(sys.argv[1])
+    history_path = Path(sys.argv[2])
+    output_path = Path(sys.argv[3])
+
+    print("Aggregating validation results...")
+
+    # Load current run's reports
+    print(f"\nLoading reports from {reports_dir}:")
+    current_run = load_current_run(reports_dir)
+    s = current_run["summary"]
+    print(
+        f"  Current run: {s['success_count']} success, "
+        f"{s['failure_count']} failure, "
+        f"{s['missing_setup_count']} missing setup "
+        f"(total: {s['total_samples']})"
+    )
+
+    # Load history and append current run
+    print(f"\nLoading history from {history_path}:")
+    runs = load_history(history_path)
+    runs.append(current_run)
+    runs = runs[-MAX_HISTORY:]
+
+    # Save updated history
+    print(f"\nSaving history to {history_path}:")
+    save_history(history_path, runs)
+
+    # Generate trend report
+    print("\nGenerating trend report...")
+    report = generate_trend_report(runs)
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(report, encoding="utf-8")
+    print(f"Trend report written to {output_path}")
+
+    # Also print the report to stdout
+    print("\n" + "=" * 80)
+    print(report)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -14,7 +14,8 @@ from agent_framework import (
    handler,
 )
 from agent_framework.github import GitHubCopilotAgent
-from copilot.types import PermissionRequest, PermissionRequestResult
+from copilot.generated.session_events import PermissionRequest
+from copilot.types import PermissionRequestResult
 from pydantic import BaseModel
 from typing_extensions import Never

@@ -36,6 +37,7 @@ class AgentResponseFormat(BaseModel):
    status: str
    output: str
    error: str
+    fix: str


@dataclass
@@ -54,15 +56,20 @@ class BatchCompletion:

 AgentInstruction = (
    "You are validating exactly one Python sample.\n"
-    "Analyze the sample code and execute it. Based on the execution result, determine if it "
-    "runs successfully, fails, or times out. Feel free to install any required dependencies.\n"
+    "Analyze the sample code and execute it as it is. Based on the execution result, determine "
+    "if it runs successfully, fails, or is missing_setup. Use `missing_setup` if the sample reports "
+    "missing required environment variables. The environment you're given should contain the necessary "
+    "variables. Don't create new environment variables nor modify the sample code.\n"
+    "Feel free to install any required dependencies if needed.\n"
    "The sample can be interactive. If it is interactive, respond to the sample when prompted "
    "based on your analysis of the code. You do not need to consult human on what to respond.\n"
+    "If the sample fails, investigate the error and suggest a fix.\n"
    "Return ONLY valid JSON with this schema:\n"
    "{\n"
-    '  "status": "success|failure|timeout|error",\n'
+    '  "status": "success|failure|missing_setup",\n'
    '  "output": "short summary of the result and what you did if the sample was interactive",\n'
-    '  "error": "error details or empty string"\n'
+    '  "error": "error details or empty string",\n'
+    '  "fix": "suggested code fix if the sample failed, otherwise empty string"\n'
    "}\n\n"
 )

@@ -87,16 +94,15 @@ def status_from_text(value: str) -> RunStatus:
    for status in RunStatus:
        if status.value == normalized:
            return status
-    return RunStatus.ERROR
+    return RunStatus.FAILURE


 def prompt_permission(
    request: PermissionRequest, context: dict[str, str]
 ) -> PermissionRequestResult:
    """Permission handler that always approves."""
-    kind = request.get("kind", "unknown")
    logger.debug(
-        f"[Permission Request: {kind}] ({context})Automatically approved for sample validation."
+        f"[Permission Request: {request.kind}] ({context})Automatically approved for sample validation."
    )
    return PermissionRequestResult(kind="approved")

@@ -108,39 +114,73 @@ class CustomAgentExecutor(Executor):
    returned as error responses, otherwise an exception in one agent could crash the entire workflow.
    """

+    # Retry in case GitHub Copilot agent encounters transient errors unrelated to the sample execution.
+    RETRY_COUNT = 1
+
    def __init__(self, agent: GitHubCopilotAgent):
        super().__init__(id=agent.id)
        self.agent = agent
+        self._session = agent.create_session()

    @handler
    async def handle_task(
        self, sample: SampleInfo, ctx: WorkflowContext[WorkerFreed | RunResult]
    ) -> None:
        """Execute one sample task and notify collector + coordinator."""
-        try:
-            response = await self.agent.run(
-                [
-                    Message(
-                        role="user",
-                        text=f"Validate the following sample:\n\n{sample.relative_path}",
+        current_retry = 0
+        while True:
+            try:
+                response = await self.agent.run(
+                    [
+                        Message(
+                            role="user",
+                            text=f"Validate the following sample:\n\n{sample.relative_path}",
+                        )
+                    ],
+                    session=self._session,
+                )
+                result_payload = parse_agent_json(response.text)
+                result = RunResult(
+                    sample=sample,
+                    status=status_from_text(result_payload.status),
+                    output=result_payload.output,
+                    error=result_payload.error,
+                    fix=result_payload.fix,
+                )
+                break
+            except Exception as ex:
+                if current_retry < self.RETRY_COUNT:
+                    logger.warning(
+                        f"Error executing agent {self.agent.id} (attempt {current_retry + 1}/{self.RETRY_COUNT}): {ex}. Retrying..."
                    )
-                ]
-            )
-            result_payload = parse_agent_json(response.text)
-            result = RunResult(
-                sample=sample,
-                status=status_from_text(result_payload.status),
-                output=result_payload.output,
-                error=result_payload.error,
-            )
-        except Exception as ex:
-            logger.error(f"Error executing agent {self.agent.id}: {ex}")
-            result = RunResult(
-                sample=sample,
-                status=RunStatus.ERROR,
-                output="",
-                error=str(ex),
-            )
+                    try:
+                        current_retry += 1
+                        await self.agent.stop()
+                        await self.agent.start()
+                        self._session = self.agent.create_session()  # Reset session for retry
+                        continue
+                    except Exception as restart_ex:
+                        logger.error(
+                            f"Error restarting agent {self.agent.id}: {restart_ex}. No more retries."
+                        )
+                        result = RunResult(
+                            sample=sample,
+                            status=RunStatus.FAILURE,
+                            output="",
+                            error=f"Original error: {ex}. Restart error: {restart_ex}",
+                            fix="",
+                        )
+                        break
+
+                logger.error(f"Error executing agent {self.agent.id}: {ex}")
+                result = RunResult(
+                    sample=sample,
+                    status=RunStatus.FAILURE,
+                    output="",
+                    error=str(ex),
+                    fix="",
+                )
+                break

        await ctx.send_message(result, target_id="collector")
        await ctx.send_message(WorkerFreed(worker_id=self.id), target_id="coordinator")
@@ -252,7 +292,7 @@ class CreateConcurrentValidationWorkflowExecutor(Executor):
                instructions=AgentInstruction,
                default_options={
                    "on_permission_request": prompt_permission,
-                    "timeout": 180,
+                    "timeout": 60,
                },  # type: ignore
            )
            agents.append(agent)
@@ -52,13 +52,18 @@ def _has_main_entrypoint_guard(path: Path) -> bool:
    )


-def discover_samples(samples_dir: Path, subdir: str | None = None) -> list[SampleInfo]:
+def discover_samples(
+    samples_dir: Path,
+    subdir: str | None = None,
+    exclude: list[str] | None = None,
+) -> list[SampleInfo]:
    """
    Find all Python sample files in the samples directory.

    Args:
        samples_dir: Root samples directory
        subdir: Optional subdirectory to filter to
+        exclude: Optional list of subdirectory paths (relative to the search directory) to exclude

    Returns:
        List of SampleInfo objects for each discovered sample
@@ -72,12 +77,21 @@ def discover_samples(samples_dir: Path, subdir: str | None = None) -> list[Sampl
    else:
        search_dir = samples_dir

+    # Resolve excluded paths to absolute for reliable comparison
+    exclude_paths = {(search_dir / exc).resolve() for exc in (exclude or [])}
+
    python_files: list[Path] = []

    # Walk through all subdirectories and find .py files
    for root, dirs, files in os.walk(search_dir):
-        # Skip directories that start with _ (like _sample_validation)
-        dirs[:] = [d for d in dirs if not d.startswith("_") and d != "__pycache__"]
+        # Skip directories that start with _, __pycache__, or excluded paths
+        dirs[:] = [
+            d
+            for d in dirs
+            if not d.startswith("_")
+            and d != "__pycache__"
+            and (Path(root) / d).resolve() not in exclude_paths
+        ]

        for file in files:
            # Skip files that start with _ and include only scripts with a main entrypoint guard
@@ -113,8 +127,10 @@ class DiscoverSamplesExecutor(Executor):
        print(f"🔍 Discovering samples in {self.config.samples_dir}")
        if self.config.subdir:
            print(f"   Filtering to subdirectory: {self.config.subdir}")
+        if self.config.exclude:
+            print(f"   Excluding: {', '.join(self.config.exclude)}")

-        samples = discover_samples(self.config.samples_dir, self.config.subdir)
+        samples = discover_samples(self.config.samples_dir, self.config.subdir, self.config.exclude)
        print(f"   Found {len(samples)} samples")

        await ctx.send_message(DiscoveryResult(samples=samples))
@@ -18,6 +18,7 @@ class ValidationConfig:
    samples_dir: Path
    python_root: Path
    subdir: str | None = None
+    exclude: list[str] | None = None
    max_parallel_workers: int = 10


@@ -60,8 +61,7 @@ class RunStatus(Enum):

    SUCCESS = "success"
    FAILURE = "failure"
-    TIMEOUT = "timeout"
-    ERROR = "error"
+    MISSING_SETUP = "missing_setup"


@dataclass
@@ -72,6 +72,7 @@ class RunResult:
    status: RunStatus
    output: str
    error: str
+    fix: str


@dataclass
@@ -89,8 +90,7 @@ class Report:
    total_samples: int
    success_count: int
    failure_count: int
-    timeout_count: int
-    error_count: int
+    missing_setup_count: int
    results: list[RunResult] = field(default_factory=list)  # type: ignore

    def to_markdown(self) -> str:
@@ -107,15 +107,14 @@ class Report:
            f"| Total Samples | {self.total_samples} |",
            f"| [PASS] Success | {self.success_count} |",
            f"| [FAIL] Failure | {self.failure_count} |",
-            f"| [TIMEOUT] Timeout | {self.timeout_count} |",
-            f"| [ERROR] Error | {self.error_count} |",
+            f"| [MISSING_SETUP] Missing Setup | {self.missing_setup_count} |",
            "",
            "## Detailed Results",
            "",
        ]

        # Group by status
-        for status in [RunStatus.FAILURE, RunStatus.TIMEOUT, RunStatus.ERROR, RunStatus.SUCCESS]:
+        for status in [RunStatus.FAILURE, RunStatus.MISSING_SETUP, RunStatus.SUCCESS]:
            status_results = [r for r in self.results if r.status == status]
            if not status_results:
                continue
@@ -123,8 +122,7 @@ class Report:
            status_label = {
                RunStatus.SUCCESS: "[PASS]",
                RunStatus.FAILURE: "[FAIL]",
-                RunStatus.TIMEOUT: "[TIMEOUT]",
-                RunStatus.ERROR: "[ERROR]",
+                RunStatus.MISSING_SETUP: "[MISSING_SETUP]",
            }

            lines.append(f"### {status_label[status]} {status.value.title()} ({len(status_results)})")
@@ -148,8 +146,7 @@ class Report:
                "total_samples": self.total_samples,
                "success_count": self.success_count,
                "failure_count": self.failure_count,
-                "timeout_count": self.timeout_count,
-                "error_count": self.error_count,
+                "missing_setup_count": self.missing_setup_count,
            },
            "results": [
                {
@@ -157,6 +154,7 @@ class Report:
                    "status": r.status.value,
                    "output": r.output,
                    "error": r.error,
+                    "fix": r.fix,
                }
                for r in self.results
            ],
@@ -22,12 +22,11 @@ def generate_report(results: list[RunResult]) -> Report:
    Returns:
        Report object with aggregated statistics
    """
-    # Sort results: failures, timeouts, errors first, then successes
+    # Sort results: failures, missing setup first, then successes
    status_priority = {
        RunStatus.FAILURE: 0,
-        RunStatus.TIMEOUT: 1,
-        RunStatus.ERROR: 2,
-        RunStatus.SUCCESS: 3,
+        RunStatus.MISSING_SETUP: 1,
+        RunStatus.SUCCESS: 2,
    }
    sorted_results = sorted(results, key=lambda r: status_priority[r.status])

@@ -36,8 +35,7 @@ def generate_report(results: list[RunResult]) -> Report:
        total_samples=len(results),
        success_count=sum(1 for r in results if r.status == RunStatus.SUCCESS),
        failure_count=sum(1 for r in results if r.status == RunStatus.FAILURE),
-        timeout_count=sum(1 for r in results if r.status == RunStatus.TIMEOUT),
-        error_count=sum(1 for r in results if r.status == RunStatus.ERROR),
+        missing_setup_count=sum(1 for r in results if r.status == RunStatus.MISSING_SETUP),
        results=sorted_results,
    )

@@ -86,8 +84,7 @@ def print_summary(report: Report) -> None:

    if (
        report.failure_count == 0
-        and report.timeout_count == 0
-        and report.error_count == 0
+        and report.missing_setup_count == 0
    ):
        print("[PASS] ALL SAMPLES PASSED!")
    else:
@@ -98,8 +95,7 @@ def print_summary(report: Report) -> None:
    print("Results:")
    print(f"  [PASS] Success: {report.success_count}")
    print(f"  [FAIL] Failure: {report.failure_count}")
-    print(f"  [TIMEOUT] Timeout: {report.timeout_count}")
-    print(f"  [ERR] Errors: {report.error_count}")
+    print(f"  [MISSING_SETUP] Missing Setup: {report.missing_setup_count}")
    print("=" * 80)

    # Print JSON output for GitHub Actions visibility
@@ -66,9 +66,10 @@ class RunDynamicValidationWorkflowExecutor(Executor):
                fallback_results = [
                    RunResult(
                        sample=sample,
-                        status=RunStatus.ERROR,
+                        status=RunStatus.FAILURE,
                        output="",
                        error="Nested workflow did not return an ExecutionResult.",
+                        fix="",
                    )
                    for sample in creation.samples
                ]