mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
0cd40f8354
* [BREAKING] Refactor middleware layering and raw clients Reorder chat client layers so function invocation wraps chat middleware, and chat middleware stays outside telemetry while still running for each inner model call. Add middleware pipeline caching, refresh docs and samples, and split Anthropic into raw and public clients to match the standard layering model. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Tighten typing ignores in ancillary modules Add targeted typing ignores in workflow visualization and lab modules so pyright stays clean alongside the middleware refactor work. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix categorize_middleware to unpack tuple/Sequence and use relative MRO assertions - Broaden isinstance check in categorize_middleware from list to Sequence so tuples and other Sequence types are properly unpacked instead of being appended as a single item. - Replace fragile hardcoded MRO index assertions in anthropic test with relative ordering via mro.index(). - Add regression tests for categorize_middleware with tuple, list, and None inputs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix middleware string decomposition, add middleware param to FunctionInvocationLayer, and add tests (#4710) - Guard categorize_middleware Sequence check against str/bytes to prevent character-by-character decomposition of accidentally passed strings - Add explicit middleware parameter to FunctionInvocationLayer.get_response and merge it into client_kwargs before categorization, fixing the inconsistency where only OpenAIChatClient supported this parameter - Add assertions that RawAnthropicClient does not inherit convenience layers - Add chat middleware cache test with non-empty base middleware - Add tests for single unwrapped middleware item and string input Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Apply pre-commit auto-fixes * Apply pre-commit auto-fixes * Address review feedback for #4710: review comment fixes --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Copilot <copilot@github.com>
114 lines
4.3 KiB
Python
114 lines
4.3 KiB
Python
# Copyright (c) Microsoft. All rights reserved.
|
|
|
|
import asyncio
|
|
from random import randint
|
|
from typing import TYPE_CHECKING, Annotated
|
|
|
|
from agent_framework import Message, tool
|
|
from agent_framework.observability import get_tracer
|
|
from agent_framework.openai import OpenAIResponsesClient
|
|
from dotenv import load_dotenv
|
|
from opentelemetry.trace import SpanKind
|
|
from opentelemetry.trace.span import format_trace_id
|
|
from pydantic import Field
|
|
|
|
if TYPE_CHECKING:
|
|
from agent_framework import SupportsChatGetResponse
|
|
|
|
|
|
"""
|
|
This sample shows how you can configure observability of an application with zero code changes.
|
|
It relies on the OpenTelemetry auto-instrumentation capabilities, and the observability setup
|
|
is done via environment variables.
|
|
|
|
Follow the install guidance from https://opentelemetry.io/docs/zero-code/python/ to install the OpenTelemetry CLI tool,
|
|
when using `uv` there are some additional steps, so follow the instructions carefully.
|
|
|
|
And setup a local OpenTelemetry Collector instance to receive the traces and metrics (and update the endpoint below).
|
|
|
|
Then you can run:
|
|
```bash
|
|
opentelemetry-instrument \
|
|
--traces_exporter otlp \
|
|
--metrics_exporter otlp \
|
|
--service_name agent_framework \
|
|
--exporter_otlp_endpoint http://localhost:4317 \
|
|
python python/samples/02-agents/observability/advanced_zero_code.py
|
|
```
|
|
(or use uv run in front when you've done the install within your uv virtual environment)
|
|
|
|
You can also set the environment variables instead of passing them as CLI arguments.
|
|
|
|
"""
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
|
|
# NOTE: approval_mode="never_require" is for sample brevity.
|
|
# Use "always_require" in production; see samples/02-agents/tools/function_tool_with_approval.py
|
|
# and samples/02-agents/tools/function_tool_with_approval_and_sessions.py.
|
|
@tool(approval_mode="never_require")
|
|
async def get_weather(
|
|
location: Annotated[str, Field(description="The location to get the weather for.")],
|
|
) -> str:
|
|
"""Get the weather for a given location."""
|
|
await asyncio.sleep(randint(0, 10) / 10.0) # Simulate a network call
|
|
conditions = ["sunny", "cloudy", "rainy", "stormy"]
|
|
return f"The weather in {location} is {conditions[randint(0, 3)]} with a high of {randint(10, 30)}°C."
|
|
|
|
|
|
async def run_chat_client(client: "SupportsChatGetResponse", stream: bool = False) -> None:
|
|
"""Run an AI service.
|
|
|
|
This function runs an AI service and prints the output.
|
|
Telemetry will be collected for the service execution behind the scenes,
|
|
and the traces will be sent to the configured telemetry backend.
|
|
|
|
The telemetry will include information about the AI service execution.
|
|
|
|
Args:
|
|
stream: Whether to use streaming for the plugin
|
|
|
|
Remarks:
|
|
When `FunctionInvocationLayer` is outside `ChatTelemetryLayer`,
|
|
each call to the model is handled as a separate span.
|
|
If `ChatMiddlewareLayer` is present, keep it outside telemetry
|
|
so middleware latency does not skew those timings.
|
|
By contrast, when telemetry is placed outside the function loop,
|
|
a single span can cover one or more rounds of function calling.
|
|
|
|
So for the scenario below, you should see the following:
|
|
|
|
2 spans with gen_ai.operation.name=chat
|
|
The first has finish_reason "tool_calls"
|
|
The second has finish_reason "stop"
|
|
2 spans with gen_ai.operation.name=execute_tool
|
|
|
|
"""
|
|
message = "What's the weather in Amsterdam and in Paris?"
|
|
print(f"User: {message}")
|
|
if stream:
|
|
print("Assistant: ", end="")
|
|
async for chunk in client.get_response([Message(role="user", text=message)], tools=get_weather, stream=True):
|
|
if chunk.text:
|
|
print(chunk.text, end="")
|
|
print("")
|
|
else:
|
|
response = await client.get_response([Message(role="user", text=message)], tools=get_weather)
|
|
print(f"Assistant: {response}")
|
|
|
|
|
|
async def main() -> None:
|
|
with get_tracer().start_as_current_span("Zero Code", kind=SpanKind.CLIENT) as current_span:
|
|
print(f"Trace ID: {format_trace_id(current_span.get_span_context().trace_id)}")
|
|
|
|
client = OpenAIResponsesClient()
|
|
|
|
await run_chat_client(client, stream=True)
|
|
await run_chat_client(client, stream=False)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|