Python: Tuning auto sample validation workflow (#4218)

* Tuning validate-01-get-started * Add gh token * Add model * enable debug log * bump up timeout for testing purposes * Test cli is working * Fix end quote * Run gh auth * Run gh auth trail 2 * Run gh auth trail 3 * Test token * Add zcure login * Add zcure login 2 * Add zcure login 3 * Add zcure login 4 * Extract common actions * Extract common actions 2 * Correct env vars * Print outputs to action console * Disable end-to-end samples * Fix ruff errors * Fix ruff errors 2 * Revert workflow changes to fix tests * Revert workflow changes to fix tests 2 * Revert workflow changes to fix tests 3 * Revert workflow changes to fix tests 4
2026-06-16 21:04:09 +08:00 · 2026-02-26 18:45:10 -08:00
parent 54c0bea3b6
commit c45d47d4b2
9 changed files with 171 additions and 112 deletions
@@ -612,11 +612,11 @@ class AgentFunctionApp(DFAppBase):
            context: Durable Functions orchestration context invoking the agent.
            agent_name: Name of the agent registered on this app.

-        Raises:
-            ValueError: If the requested agent has not been registered.
-
        Returns:
            DurableAIAgent[AgentTask] wrapper bound to the orchestration context.
+
+        Raises:
+            ValueError: If the requested agent has not been registered.
        """
        normalized_name = str(agent_name)

@@ -93,13 +93,13 @@ def detect_media_type_from_base64(
            This will look at the actual data to determine the media_type and not at the URI prefix.
            Will also not compare those two values.

-    Raises:
-        ValueError: If not exactly 1 of data_bytes, data_str, or data_uri is provided, or if base64 decoding fails.
-
    Returns:
        The detected media type (e.g., 'image/png', 'audio/wav', 'application/pdf')
        or None if the format is not recognized.

+    Raises:
+        ValueError: If not exactly 1 of data_bytes, data_str, or data_uri is provided, or if base64 decoding fails.
+
    Examples:
        .. code-block:: python

@@ -670,6 +670,9 @@ class Content:
            additional_properties: Optional additional properties.
            raw_representation: Optional raw representation from an underlying implementation.

+        Returns:
+            A Content instance with type="data" for data URIs or type="uri" for external URIs.
+
        Raises:
            ContentError: If the URI is not valid.

@@ -693,9 +696,6 @@ class Content:
                        raw_base64_string
                    }"
                )
-
-        Returns:
-            A Content instance with type="data" for data URIs or type="uri" for external URIs.
        """
        return cls(
            **_validate_uri(uri, media_type),
@@ -374,7 +374,6 @@ class Workflow(DictConvertible):
                        with _framework_event_origin():
                            pending_status = WorkflowEvent.status(WorkflowRunState.IN_PROGRESS_PENDING_REQUESTS)
                        yield pending_status
-
                # Workflow runs until idle - emit final status based on whether requests are pending
                if saw_request:
                    with _framework_event_origin():
@@ -92,8 +92,7 @@ class DeploymentManager:
                        break

                    # Get event from queue with short timeout
-                    event = await asyncio.wait_for(event_queue.get(), timeout=0.1)
-                    yield event
+                    yield await asyncio.wait_for(event_queue.get(), timeout=0.1)
                except asyncio.TimeoutError:
                    # No event in queue, continue waiting
                    continue
@@ -148,6 +148,8 @@ ignore = [
 "**/tests/**" = ["D", "INP", "TD", "ERA001", "RUF", "S"]
 "samples/**" = ["D", "INP", "ERA001", "RUF", "S", "T201", "CPY"]
 "*.ipynb" = ["CPY", "E501"]
+# RUF070: Assignment before yield is intentional - context manager must exit before yielding
+"**/agent_framework/_workflows/_workflow.py" = ["RUF070"]

 [tool.ruff.format]
 docstring-code-format = true
@@ -53,9 +53,10 @@ class BatchCompletion:

 AgentInstruction = (
    "You are validating exactly one Python sample.\n"
-    "Analyze the sample code and execute it. Determine if it runs successfully, fails, or times out.\n"
+    "Analyze the sample code and execute it. Based on the execution result, determine if it "
+    "runs successfully, fails, or times out. Feel free to install any required dependencies.\n"
    "The sample can be interactive. If it is interactive, respond to the sample when prompted "
-    "based on your analysis of the code. You do not need to consult human on what to respond\n"
+    "based on your analysis of the code. You do not need to consult human on what to respond.\n"
    "Return ONLY valid JSON with this schema:\n"
    "{\n"
    '  "status": "success|failure|timeout|error",\n'
@@ -21,6 +21,14 @@ def generate_report(results: list[RunResult]) -> Report:
    Returns:
        Report object with aggregated statistics
    """
+    # Sort results: failures, timeouts, errors first, then successes
+    status_priority = {
+        RunStatus.FAILURE: 0,
+        RunStatus.TIMEOUT: 1,
+        RunStatus.ERROR: 2,
+        RunStatus.SUCCESS: 3,
+    }
+    sorted_results = sorted(results, key=lambda r: status_priority[r.status])

    return Report(
        timestamp=datetime.now(),
@@ -29,7 +37,7 @@ def generate_report(results: list[RunResult]) -> Report:
        failure_count=sum(1 for r in results if r.status == RunStatus.FAILURE),
        timeout_count=sum(1 for r in results if r.status == RunStatus.TIMEOUT),
        error_count=sum(1 for r in results if r.status == RunStatus.ERROR),
-        results=results,
+        results=sorted_results,
    )


@@ -84,9 +92,13 @@ def print_summary(report: Report) -> None:
    print(f"  [PASS] Success: {report.success_count}")
    print(f"  [FAIL] Failure: {report.failure_count}")
    print(f"  [TIMEOUT] Timeout: {report.timeout_count}")
-    print(f"  [ERROR] Error: {report.error_count}")
+    print(f"  [ERR] Errors: {report.error_count}")
    print("=" * 80)

+    # Print JSON output for GitHub Actions visibility
+    print("\nJSON Report:")
+    print(json.dumps(report.to_dict(), indent=2))
+

 class GenerateReportExecutor(Executor):
    """Executor that generates the final validation report."""