mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Tuning auto sample validation workflow (#4218)
* Tuning validate-01-get-started * Add gh token * Add model * enable debug log * bump up timeout for testing purposes * Test cli is working * Fix end quote * Run gh auth * Run gh auth trail 2 * Run gh auth trail 3 * Test token * Add zcure login * Add zcure login 2 * Add zcure login 3 * Add zcure login 4 * Extract common actions * Extract common actions 2 * Correct env vars * Print outputs to action console * Disable end-to-end samples * Fix ruff errors * Fix ruff errors 2 * Revert workflow changes to fix tests * Revert workflow changes to fix tests 2 * Revert workflow changes to fix tests 3 * Revert workflow changes to fix tests 4
This commit is contained in:
committed by
GitHub
Unverified
parent
54c0bea3b6
commit
c45d47d4b2
@@ -612,11 +612,11 @@ class AgentFunctionApp(DFAppBase):
|
||||
context: Durable Functions orchestration context invoking the agent.
|
||||
agent_name: Name of the agent registered on this app.
|
||||
|
||||
Raises:
|
||||
ValueError: If the requested agent has not been registered.
|
||||
|
||||
Returns:
|
||||
DurableAIAgent[AgentTask] wrapper bound to the orchestration context.
|
||||
|
||||
Raises:
|
||||
ValueError: If the requested agent has not been registered.
|
||||
"""
|
||||
normalized_name = str(agent_name)
|
||||
|
||||
|
||||
@@ -93,13 +93,13 @@ def detect_media_type_from_base64(
|
||||
This will look at the actual data to determine the media_type and not at the URI prefix.
|
||||
Will also not compare those two values.
|
||||
|
||||
Raises:
|
||||
ValueError: If not exactly 1 of data_bytes, data_str, or data_uri is provided, or if base64 decoding fails.
|
||||
|
||||
Returns:
|
||||
The detected media type (e.g., 'image/png', 'audio/wav', 'application/pdf')
|
||||
or None if the format is not recognized.
|
||||
|
||||
Raises:
|
||||
ValueError: If not exactly 1 of data_bytes, data_str, or data_uri is provided, or if base64 decoding fails.
|
||||
|
||||
Examples:
|
||||
.. code-block:: python
|
||||
|
||||
@@ -670,6 +670,9 @@ class Content:
|
||||
additional_properties: Optional additional properties.
|
||||
raw_representation: Optional raw representation from an underlying implementation.
|
||||
|
||||
Returns:
|
||||
A Content instance with type="data" for data URIs or type="uri" for external URIs.
|
||||
|
||||
Raises:
|
||||
ContentError: If the URI is not valid.
|
||||
|
||||
@@ -693,9 +696,6 @@ class Content:
|
||||
raw_base64_string
|
||||
}"
|
||||
)
|
||||
|
||||
Returns:
|
||||
A Content instance with type="data" for data URIs or type="uri" for external URIs.
|
||||
"""
|
||||
return cls(
|
||||
**_validate_uri(uri, media_type),
|
||||
|
||||
@@ -374,7 +374,6 @@ class Workflow(DictConvertible):
|
||||
with _framework_event_origin():
|
||||
pending_status = WorkflowEvent.status(WorkflowRunState.IN_PROGRESS_PENDING_REQUESTS)
|
||||
yield pending_status
|
||||
|
||||
# Workflow runs until idle - emit final status based on whether requests are pending
|
||||
if saw_request:
|
||||
with _framework_event_origin():
|
||||
|
||||
@@ -92,8 +92,7 @@ class DeploymentManager:
|
||||
break
|
||||
|
||||
# Get event from queue with short timeout
|
||||
event = await asyncio.wait_for(event_queue.get(), timeout=0.1)
|
||||
yield event
|
||||
yield await asyncio.wait_for(event_queue.get(), timeout=0.1)
|
||||
except asyncio.TimeoutError:
|
||||
# No event in queue, continue waiting
|
||||
continue
|
||||
|
||||
@@ -148,6 +148,8 @@ ignore = [
|
||||
"**/tests/**" = ["D", "INP", "TD", "ERA001", "RUF", "S"]
|
||||
"samples/**" = ["D", "INP", "ERA001", "RUF", "S", "T201", "CPY"]
|
||||
"*.ipynb" = ["CPY", "E501"]
|
||||
# RUF070: Assignment before yield is intentional - context manager must exit before yielding
|
||||
"**/agent_framework/_workflows/_workflow.py" = ["RUF070"]
|
||||
|
||||
[tool.ruff.format]
|
||||
docstring-code-format = true
|
||||
|
||||
@@ -53,9 +53,10 @@ class BatchCompletion:
|
||||
|
||||
AgentInstruction = (
|
||||
"You are validating exactly one Python sample.\n"
|
||||
"Analyze the sample code and execute it. Determine if it runs successfully, fails, or times out.\n"
|
||||
"Analyze the sample code and execute it. Based on the execution result, determine if it "
|
||||
"runs successfully, fails, or times out. Feel free to install any required dependencies.\n"
|
||||
"The sample can be interactive. If it is interactive, respond to the sample when prompted "
|
||||
"based on your analysis of the code. You do not need to consult human on what to respond\n"
|
||||
"based on your analysis of the code. You do not need to consult human on what to respond.\n"
|
||||
"Return ONLY valid JSON with this schema:\n"
|
||||
"{\n"
|
||||
' "status": "success|failure|timeout|error",\n'
|
||||
|
||||
@@ -21,6 +21,14 @@ def generate_report(results: list[RunResult]) -> Report:
|
||||
Returns:
|
||||
Report object with aggregated statistics
|
||||
"""
|
||||
# Sort results: failures, timeouts, errors first, then successes
|
||||
status_priority = {
|
||||
RunStatus.FAILURE: 0,
|
||||
RunStatus.TIMEOUT: 1,
|
||||
RunStatus.ERROR: 2,
|
||||
RunStatus.SUCCESS: 3,
|
||||
}
|
||||
sorted_results = sorted(results, key=lambda r: status_priority[r.status])
|
||||
|
||||
return Report(
|
||||
timestamp=datetime.now(),
|
||||
@@ -29,7 +37,7 @@ def generate_report(results: list[RunResult]) -> Report:
|
||||
failure_count=sum(1 for r in results if r.status == RunStatus.FAILURE),
|
||||
timeout_count=sum(1 for r in results if r.status == RunStatus.TIMEOUT),
|
||||
error_count=sum(1 for r in results if r.status == RunStatus.ERROR),
|
||||
results=results,
|
||||
results=sorted_results,
|
||||
)
|
||||
|
||||
|
||||
@@ -84,9 +92,13 @@ def print_summary(report: Report) -> None:
|
||||
print(f" [PASS] Success: {report.success_count}")
|
||||
print(f" [FAIL] Failure: {report.failure_count}")
|
||||
print(f" [TIMEOUT] Timeout: {report.timeout_count}")
|
||||
print(f" [ERROR] Error: {report.error_count}")
|
||||
print(f" [ERR] Errors: {report.error_count}")
|
||||
print("=" * 80)
|
||||
|
||||
# Print JSON output for GitHub Actions visibility
|
||||
print("\nJSON Report:")
|
||||
print(json.dumps(report.to_dict(), indent=2))
|
||||
|
||||
|
||||
class GenerateReportExecutor(Executor):
|
||||
"""Executor that generates the final validation report."""
|
||||
|
||||
Reference in New Issue
Block a user