Python: Tuning auto sample validation workflow (#4218)

* Tuning validate-01-get-started

* Add gh token

* Add model

* enable debug log

* bump up timeout for testing purposes

* Test cli is working

* Fix end quote

* Run gh auth

* Run gh auth trail 2

* Run gh auth trail 3

* Test token

* Add zcure login

* Add zcure login 2

* Add zcure login 3

* Add zcure login 4

* Extract common actions

* Extract common actions 2

* Correct env vars

* Print outputs to action console

* Disable end-to-end samples

* Fix ruff errors

* Fix ruff errors 2

* Revert workflow changes to fix tests

* Revert workflow changes to fix tests 2

* Revert workflow changes to fix tests 3

* Revert workflow changes to fix tests 4
This commit is contained in:
Tao Chen
2026-02-26 18:45:10 -08:00
committed by GitHub
Unverified
parent 54c0bea3b6
commit c45d47d4b2
9 changed files with 171 additions and 112 deletions
@@ -612,11 +612,11 @@ class AgentFunctionApp(DFAppBase):
context: Durable Functions orchestration context invoking the agent.
agent_name: Name of the agent registered on this app.
Raises:
ValueError: If the requested agent has not been registered.
Returns:
DurableAIAgent[AgentTask] wrapper bound to the orchestration context.
Raises:
ValueError: If the requested agent has not been registered.
"""
normalized_name = str(agent_name)
@@ -93,13 +93,13 @@ def detect_media_type_from_base64(
This will look at the actual data to determine the media_type and not at the URI prefix.
Will also not compare those two values.
Raises:
ValueError: If not exactly 1 of data_bytes, data_str, or data_uri is provided, or if base64 decoding fails.
Returns:
The detected media type (e.g., 'image/png', 'audio/wav', 'application/pdf')
or None if the format is not recognized.
Raises:
ValueError: If not exactly 1 of data_bytes, data_str, or data_uri is provided, or if base64 decoding fails.
Examples:
.. code-block:: python
@@ -670,6 +670,9 @@ class Content:
additional_properties: Optional additional properties.
raw_representation: Optional raw representation from an underlying implementation.
Returns:
A Content instance with type="data" for data URIs or type="uri" for external URIs.
Raises:
ContentError: If the URI is not valid.
@@ -693,9 +696,6 @@ class Content:
raw_base64_string
}"
)
Returns:
A Content instance with type="data" for data URIs or type="uri" for external URIs.
"""
return cls(
**_validate_uri(uri, media_type),
@@ -374,7 +374,6 @@ class Workflow(DictConvertible):
with _framework_event_origin():
pending_status = WorkflowEvent.status(WorkflowRunState.IN_PROGRESS_PENDING_REQUESTS)
yield pending_status
# Workflow runs until idle - emit final status based on whether requests are pending
if saw_request:
with _framework_event_origin():
@@ -92,8 +92,7 @@ class DeploymentManager:
break
# Get event from queue with short timeout
event = await asyncio.wait_for(event_queue.get(), timeout=0.1)
yield event
yield await asyncio.wait_for(event_queue.get(), timeout=0.1)
except asyncio.TimeoutError:
# No event in queue, continue waiting
continue
+2
View File
@@ -148,6 +148,8 @@ ignore = [
"**/tests/**" = ["D", "INP", "TD", "ERA001", "RUF", "S"]
"samples/**" = ["D", "INP", "ERA001", "RUF", "S", "T201", "CPY"]
"*.ipynb" = ["CPY", "E501"]
# RUF070: Assignment before yield is intentional - context manager must exit before yielding
"**/agent_framework/_workflows/_workflow.py" = ["RUF070"]
[tool.ruff.format]
docstring-code-format = true
@@ -53,9 +53,10 @@ class BatchCompletion:
AgentInstruction = (
"You are validating exactly one Python sample.\n"
"Analyze the sample code and execute it. Determine if it runs successfully, fails, or times out.\n"
"Analyze the sample code and execute it. Based on the execution result, determine if it "
"runs successfully, fails, or times out. Feel free to install any required dependencies.\n"
"The sample can be interactive. If it is interactive, respond to the sample when prompted "
"based on your analysis of the code. You do not need to consult human on what to respond\n"
"based on your analysis of the code. You do not need to consult human on what to respond.\n"
"Return ONLY valid JSON with this schema:\n"
"{\n"
' "status": "success|failure|timeout|error",\n'
+14 -2
View File
@@ -21,6 +21,14 @@ def generate_report(results: list[RunResult]) -> Report:
Returns:
Report object with aggregated statistics
"""
# Sort results: failures, timeouts, errors first, then successes
status_priority = {
RunStatus.FAILURE: 0,
RunStatus.TIMEOUT: 1,
RunStatus.ERROR: 2,
RunStatus.SUCCESS: 3,
}
sorted_results = sorted(results, key=lambda r: status_priority[r.status])
return Report(
timestamp=datetime.now(),
@@ -29,7 +37,7 @@ def generate_report(results: list[RunResult]) -> Report:
failure_count=sum(1 for r in results if r.status == RunStatus.FAILURE),
timeout_count=sum(1 for r in results if r.status == RunStatus.TIMEOUT),
error_count=sum(1 for r in results if r.status == RunStatus.ERROR),
results=results,
results=sorted_results,
)
@@ -84,9 +92,13 @@ def print_summary(report: Report) -> None:
print(f" [PASS] Success: {report.success_count}")
print(f" [FAIL] Failure: {report.failure_count}")
print(f" [TIMEOUT] Timeout: {report.timeout_count}")
print(f" [ERROR] Error: {report.error_count}")
print(f" [ERR] Errors: {report.error_count}")
print("=" * 80)
# Print JSON output for GitHub Actions visibility
print("\nJSON Report:")
print(json.dumps(report.to_dict(), indent=2))
class GenerateReportExecutor(Executor):
"""Executor that generates the final validation report."""