Python: Tuning auto sample validation workflow (#4218)

* Tuning validate-01-get-started

* Add gh token

* Add model

* enable debug log

* bump up timeout for testing purposes

* Test cli is working

* Fix end quote

* Run gh auth

* Run gh auth trail 2

* Run gh auth trail 3

* Test token

* Add zcure login

* Add zcure login 2

* Add zcure login 3

* Add zcure login 4

* Extract common actions

* Extract common actions 2

* Correct env vars

* Print outputs to action console

* Disable end-to-end samples

* Fix ruff errors

* Fix ruff errors 2

* Revert workflow changes to fix tests

* Revert workflow changes to fix tests 2

* Revert workflow changes to fix tests 3

* Revert workflow changes to fix tests 4
This commit is contained in:
Tao Chen
2026-02-26 18:45:10 -08:00
committed by GitHub
Unverified
parent 54c0bea3b6
commit c45d47d4b2
9 changed files with 171 additions and 112 deletions
@@ -53,9 +53,10 @@ class BatchCompletion:
AgentInstruction = (
"You are validating exactly one Python sample.\n"
"Analyze the sample code and execute it. Determine if it runs successfully, fails, or times out.\n"
"Analyze the sample code and execute it. Based on the execution result, determine if it "
"runs successfully, fails, or times out. Feel free to install any required dependencies.\n"
"The sample can be interactive. If it is interactive, respond to the sample when prompted "
"based on your analysis of the code. You do not need to consult human on what to respond\n"
"based on your analysis of the code. You do not need to consult human on what to respond.\n"
"Return ONLY valid JSON with this schema:\n"
"{\n"
' "status": "success|failure|timeout|error",\n'
+14 -2
View File
@@ -21,6 +21,14 @@ def generate_report(results: list[RunResult]) -> Report:
Returns:
Report object with aggregated statistics
"""
# Sort results: failures, timeouts, errors first, then successes
status_priority = {
RunStatus.FAILURE: 0,
RunStatus.TIMEOUT: 1,
RunStatus.ERROR: 2,
RunStatus.SUCCESS: 3,
}
sorted_results = sorted(results, key=lambda r: status_priority[r.status])
return Report(
timestamp=datetime.now(),
@@ -29,7 +37,7 @@ def generate_report(results: list[RunResult]) -> Report:
failure_count=sum(1 for r in results if r.status == RunStatus.FAILURE),
timeout_count=sum(1 for r in results if r.status == RunStatus.TIMEOUT),
error_count=sum(1 for r in results if r.status == RunStatus.ERROR),
results=results,
results=sorted_results,
)
@@ -84,9 +92,13 @@ def print_summary(report: Report) -> None:
print(f" [PASS] Success: {report.success_count}")
print(f" [FAIL] Failure: {report.failure_count}")
print(f" [TIMEOUT] Timeout: {report.timeout_count}")
print(f" [ERROR] Error: {report.error_count}")
print(f" [ERR] Errors: {report.error_count}")
print("=" * 80)
# Print JSON output for GitHub Actions visibility
print("\nJSON Report:")
print(json.dumps(report.to_dict(), indent=2))
class GenerateReportExecutor(Executor):
"""Executor that generates the final validation report."""