mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Tuning auto sample validation workflow (#4218)
* Tuning validate-01-get-started * Add gh token * Add model * enable debug log * bump up timeout for testing purposes * Test cli is working * Fix end quote * Run gh auth * Run gh auth trail 2 * Run gh auth trail 3 * Test token * Add zcure login * Add zcure login 2 * Add zcure login 3 * Add zcure login 4 * Extract common actions * Extract common actions 2 * Correct env vars * Print outputs to action console * Disable end-to-end samples * Fix ruff errors * Fix ruff errors 2 * Revert workflow changes to fix tests * Revert workflow changes to fix tests 2 * Revert workflow changes to fix tests 3 * Revert workflow changes to fix tests 4
This commit is contained in:
committed by
GitHub
Unverified
parent
54c0bea3b6
commit
c45d47d4b2
@@ -53,9 +53,10 @@ class BatchCompletion:
|
||||
|
||||
AgentInstruction = (
|
||||
"You are validating exactly one Python sample.\n"
|
||||
"Analyze the sample code and execute it. Determine if it runs successfully, fails, or times out.\n"
|
||||
"Analyze the sample code and execute it. Based on the execution result, determine if it "
|
||||
"runs successfully, fails, or times out. Feel free to install any required dependencies.\n"
|
||||
"The sample can be interactive. If it is interactive, respond to the sample when prompted "
|
||||
"based on your analysis of the code. You do not need to consult human on what to respond\n"
|
||||
"based on your analysis of the code. You do not need to consult human on what to respond.\n"
|
||||
"Return ONLY valid JSON with this schema:\n"
|
||||
"{\n"
|
||||
' "status": "success|failure|timeout|error",\n'
|
||||
|
||||
@@ -21,6 +21,14 @@ def generate_report(results: list[RunResult]) -> Report:
|
||||
Returns:
|
||||
Report object with aggregated statistics
|
||||
"""
|
||||
# Sort results: failures, timeouts, errors first, then successes
|
||||
status_priority = {
|
||||
RunStatus.FAILURE: 0,
|
||||
RunStatus.TIMEOUT: 1,
|
||||
RunStatus.ERROR: 2,
|
||||
RunStatus.SUCCESS: 3,
|
||||
}
|
||||
sorted_results = sorted(results, key=lambda r: status_priority[r.status])
|
||||
|
||||
return Report(
|
||||
timestamp=datetime.now(),
|
||||
@@ -29,7 +37,7 @@ def generate_report(results: list[RunResult]) -> Report:
|
||||
failure_count=sum(1 for r in results if r.status == RunStatus.FAILURE),
|
||||
timeout_count=sum(1 for r in results if r.status == RunStatus.TIMEOUT),
|
||||
error_count=sum(1 for r in results if r.status == RunStatus.ERROR),
|
||||
results=results,
|
||||
results=sorted_results,
|
||||
)
|
||||
|
||||
|
||||
@@ -84,9 +92,13 @@ def print_summary(report: Report) -> None:
|
||||
print(f" [PASS] Success: {report.success_count}")
|
||||
print(f" [FAIL] Failure: {report.failure_count}")
|
||||
print(f" [TIMEOUT] Timeout: {report.timeout_count}")
|
||||
print(f" [ERROR] Error: {report.error_count}")
|
||||
print(f" [ERR] Errors: {report.error_count}")
|
||||
print("=" * 80)
|
||||
|
||||
# Print JSON output for GitHub Actions visibility
|
||||
print("\nJSON Report:")
|
||||
print(json.dumps(report.to_dict(), indent=2))
|
||||
|
||||
|
||||
class GenerateReportExecutor(Executor):
|
||||
"""Executor that generates the final validation report."""
|
||||
|
||||
Reference in New Issue
Block a user