From db58a10a3756d9bea1c6db18cafb5b120daec322 Mon Sep 17 00:00:00 2001 From: Eduard van Valkenburg Date: Mon, 15 Sep 2025 20:28:01 +0200 Subject: [PATCH] Python: added args and results to function call span (#733) * added args and results to function call span * add ignores for mypy * improved serialization * also add None result * improved handling * log args when None * slight tweak * fix tests --- .../packages/main/agent_framework/_tools.py | 40 ++++++++----- .../main/agent_framework/telemetry.py | 28 ++++++--- .../main/tests/main/test_telemetry.py | 58 ++++++------------ python/packages/main/tests/main/test_tools.py | 59 ++++++++++++++----- python/packages/main/tests/main/utils.py | 12 ++++ .../telemetry/02a-generic_chat_client.py | 6 +- 6 files changed, 123 insertions(+), 80 deletions(-) create mode 100644 python/packages/main/tests/main/utils.py diff --git a/python/packages/main/agent_framework/_tools.py b/python/packages/main/agent_framework/_tools.py index b0751b2406..9825af985a 100644 --- a/python/packages/main/agent_framework/_tools.py +++ b/python/packages/main/agent_framework/_tools.py @@ -2,6 +2,7 @@ import asyncio import inspect +import json import sys from collections.abc import AsyncIterable, Awaitable, Callable, Collection, MutableMapping, Sequence from functools import wraps @@ -31,6 +32,7 @@ from .telemetry import ( OtelAttr, _capture_exception, # type: ignore get_function_span, + get_function_span_attributes, meter, ) @@ -407,7 +409,7 @@ class AIFunction(BaseTool, Generic[ArgsT, ReturnT]): if not isinstance(arguments, self.input_model): raise TypeError(f"Expected {self.input_model.__name__}, got {type(arguments).__name__}") kwargs = arguments.model_dump(exclude_none=True) - if not OTEL_SETTINGS.ENABLED: # type: ignore + if not OTEL_SETTINGS.ENABLED: # type: ignore[name-defined] logger.info(f"Function name: {self.name}") logger.debug(f"Function arguments: {kwargs}") res = self.__call__(**kwargs) @@ -417,16 +419,19 @@ class AIFunction(BaseTool, Generic[ArgsT, ReturnT]): return result # type: ignore[reportReturnType] setup_telemetry() - with get_function_span( - function=self, - tool_call_id=tool_call_id, - ) as span: - hist_attributes: dict[str, Any] = { - OtelAttr.MEASUREMENT_FUNCTION_TAG_NAME: self.name, - OtelAttr.TOOL_CALL_ID: tool_call_id or "unknown", - } + attributes = get_function_span_attributes(self, tool_call_id=tool_call_id) + if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined] + attributes.update({ + OtelAttr.TOOL_ARGUMENTS: arguments.model_dump_json() + if arguments + else json.dumps(kwargs) + if kwargs + else "None" + }) + with get_function_span(attributes=attributes) as span: + attributes[OtelAttr.MEASUREMENT_FUNCTION_TAG_NAME] = self.name logger.info(f"Function name: {self.name}") - if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore + if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined] logger.debug(f"Function arguments: {kwargs}") start_time_stamp = perf_counter() end_time_stamp: float | None = None @@ -436,19 +441,26 @@ class AIFunction(BaseTool, Generic[ArgsT, ReturnT]): end_time_stamp = perf_counter() except Exception as exception: end_time_stamp = perf_counter() - hist_attributes[OtelAttr.ERROR_TYPE] = type(exception).__name__ + attributes[OtelAttr.ERROR_TYPE] = type(exception).__name__ _capture_exception(span=span, exception=exception, timestamp=time_ns()) logger.error(f"Function failed. Error: {exception}") raise else: logger.info(f"Function {self.name} succeeded.") - if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore - logger.debug(f"Function result: {result or 'None'}") + if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined] + try: + json_result = json.dumps(result) + except (TypeError, OverflowError): + span.set_attribute(OtelAttr.TOOL_RESULT, "") + logger.debug("Function result: ") + else: + span.set_attribute(OtelAttr.TOOL_RESULT, json_result) + logger.debug(f"Function result: {json_result}") return result # type: ignore[reportReturnType] finally: duration = (end_time_stamp or perf_counter()) - start_time_stamp span.set_attribute(OtelAttr.MEASUREMENT_FUNCTION_INVOCATION_DURATION, duration) - self._invocation_duration_histogram.record(duration, attributes=hist_attributes) + self._invocation_duration_histogram.record(duration, attributes=attributes) logger.info("Function duration: %fs", duration) def parameters(self) -> dict[str, Any]: diff --git a/python/packages/main/agent_framework/telemetry.py b/python/packages/main/agent_framework/telemetry.py index 604bed1388..0c276ed2c7 100644 --- a/python/packages/main/agent_framework/telemetry.py +++ b/python/packages/main/agent_framework/telemetry.py @@ -203,6 +203,8 @@ class OtelAttr(str, Enum): TOOL_DESCRIPTION = "gen_ai.tool.description" TOOL_NAME = "gen_ai.tool.name" TOOL_TYPE = "gen_ai.tool.type" + TOOL_ARGUMENTS = "gen_ai.tool.call.arguments" + TOOL_RESULT = "gen_ai.tool.call.result" # Agent attributes AGENT_ID = "gen_ai.agent.id" # Client attributes @@ -885,18 +887,15 @@ def use_agent_telemetry( # region Otel Helpers -def get_function_span( - function: "AIFunction[Any, Any]", - tool_call_id: str | None = None, -) -> "_AgnosticContextManager[Span]": - """Starts a span for the given function. +def get_function_span_attributes(function: "AIFunction[Any, Any]", tool_call_id: str | None = None) -> dict[str, str]: + """Get the span attributes for the given function. Args: - function: The function for which to start the span. + function: The function for which to get the span attributes. tool_call_id: The id of the tool_call that was requested. Returns: - trace.Span: The started span as a context manager. + dict[str, str]: The span attributes. """ attributes: dict[str, str] = { OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION, @@ -906,9 +905,22 @@ def get_function_span( } if function.description: attributes[OtelAttr.TOOL_DESCRIPTION] = function.description + return attributes + +def get_function_span( + attributes: dict[str, str], +) -> "_AgnosticContextManager[Span]": + """Starts a span for the given function. + + Args: + attributes: The span attributes. + + Returns: + trace.Span: The started span as a context manager. + """ return tracer.start_as_current_span( - name=f"{OtelAttr.TOOL_EXECUTION_OPERATION} {function.name}", + name=f"{attributes[OtelAttr.OPERATION]} {attributes[OtelAttr.TOOL_NAME]}", attributes=attributes, set_status_on_exception=False, end_on_exit=True, diff --git a/python/packages/main/tests/main/test_telemetry.py b/python/packages/main/tests/main/test_telemetry.py index 597961488e..c0aaf79be8 100644 --- a/python/packages/main/tests/main/test_telemetry.py +++ b/python/packages/main/tests/main/test_telemetry.py @@ -37,6 +37,8 @@ from agent_framework.telemetry import ( use_telemetry, ) +from .utils import CopyingMock + # region Test constants @@ -198,8 +200,14 @@ def test_start_span_basic(): mock_function = Mock() mock_function.name = "test_function" mock_function.description = "Test function description" + attributes = { + OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION, + OtelAttr.TOOL_NAME: "test_function", + OtelAttr.TOOL_DESCRIPTION: "Test function description", + OtelAttr.TOOL_TYPE: "function", + } - result = get_function_span(mock_function) + result = get_function_span(attributes) assert result == mock_span mock_tracer.start_as_current_span.assert_called_once() @@ -217,7 +225,7 @@ def test_start_span_with_tool_call_id(): """Test starting a span with tool_call_id.""" mock_tracer = Mock() with patch("agent_framework.telemetry.tracer", mock_tracer): - mock_span = Mock() + mock_span = CopyingMock() mock_tracer.start_as_current_span.return_value = mock_span mock_function = Mock() @@ -225,32 +233,21 @@ def test_start_span_with_tool_call_id(): mock_function.description = "Test function" tool_call_id = "test_call_123" + attributes = { + OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION, + OtelAttr.TOOL_NAME: "test_function", + OtelAttr.TOOL_DESCRIPTION: "Test function", + OtelAttr.TOOL_TYPE: "function", + OtelAttr.TOOL_CALL_ID: tool_call_id, + } - _ = get_function_span(mock_function, tool_call_id) + _ = get_function_span(attributes) call_args = mock_tracer.start_as_current_span.call_args attributes = call_args[1]["attributes"] assert attributes[OtelAttr.TOOL_CALL_ID] == "test_call_123" -def test_start_span_without_description(): - """Test starting a span when function has no description.""" - mock_tracer = Mock() - with patch("agent_framework.telemetry.tracer", mock_tracer): - mock_span = Mock() - mock_tracer.start_as_current_span.return_value = mock_span - - mock_function = Mock() - mock_function.name = "test_function" - mock_function.description = None - - get_function_span(mock_function) - - call_args = mock_tracer.start_as_current_span.call_args - attributes = call_args[1]["attributes"] - assert OtelAttr.TOOL_DESCRIPTION not in attributes - - # region Test use_telemetry decorator @@ -377,25 +374,6 @@ async def test_streaming_response_with_otel(mock_chat_client, otel_settings): mock_set_output.assert_called_once() -def test_start_as_current_span_with_none_metadata(): - """Test get_function_span with None metadata.""" - mock_tracer = Mock() - with patch("agent_framework.telemetry.tracer", mock_tracer): - mock_span = Mock() - mock_tracer.start_as_current_span.return_value = mock_span - - mock_function = Mock() - mock_function.name = "test_function" - mock_function.description = "Test description" - - result = get_function_span(mock_function, None) - - assert result == mock_span - call_args = mock_tracer.start_as_current_span.call_args - attributes = call_args[1]["attributes"] - assert attributes[OtelAttr.TOOL_CALL_ID] == "unknown" - - def test_prepend_user_agent_with_none_value(): """Test prepend user agent with None value in headers.""" headers = {"User-Agent": None} diff --git a/python/packages/main/tests/main/test_tools.py b/python/packages/main/tests/main/test_tools.py index 11daff6716..43e72f10a2 100644 --- a/python/packages/main/tests/main/test_tools.py +++ b/python/packages/main/tests/main/test_tools.py @@ -17,6 +17,8 @@ from agent_framework._tools import _parse_inputs from agent_framework.exceptions import ToolException from agent_framework.telemetry import OtelAttr +from .utils import CopyingMock + # region AIFunction and ai_function decorator tests @@ -83,14 +85,13 @@ async def test_ai_function_decorator_with_async(): assert (await async_test_tool(1, 2)) == 3 -@pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True) +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) async def test_ai_function_invoke_telemetry_enabled(otel_settings): """Test the ai_function invoke method with telemetry enabled.""" @ai_function( name="telemetry_test_tool", description="A test tool for telemetry", - additional_properties={"otel_settings": otel_settings}, ) def telemetry_test_tool(x: int, y: int) -> int: """A function that adds two numbers for telemetry testing.""" @@ -99,7 +100,8 @@ async def test_ai_function_invoke_telemetry_enabled(otel_settings): # Mock the tracer and span with ( patch("agent_framework.telemetry.tracer"), - patch("agent_framework._tools.get_function_span") as mock_start_span, + # the span creation uses a form of deepcopy, so need to mock that way + patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span, ): mock_span = Mock() mock_context_manager = Mock() @@ -118,7 +120,17 @@ async def test_ai_function_invoke_telemetry_enabled(otel_settings): assert result == 3 # Verify telemetry calls - mock_start_span.assert_called_once_with(function=telemetry_test_tool, tool_call_id="test_call_id") + mock_start_span.assert_called_once_with( + attributes={ + OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION, + OtelAttr.TOOL_NAME: "telemetry_test_tool", + OtelAttr.TOOL_CALL_ID: "test_call_id", + OtelAttr.TOOL_TYPE: "function", + OtelAttr.TOOL_DESCRIPTION: "A test tool for telemetry", + OtelAttr.TOOL_ARGUMENTS: '{"x": 1, "y": 2}', + } + ) + assert mock_span.set_attribute.call_count == 2 # Verify histogram was called with correct attributes mock_histogram.record.assert_called_once() @@ -129,14 +141,13 @@ async def test_ai_function_invoke_telemetry_enabled(otel_settings): assert attributes[OtelAttr.TOOL_CALL_ID] == "test_call_id" -@pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True) +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) async def test_ai_function_invoke_telemetry_with_pydantic_args(otel_settings): """Test the ai_function invoke method with Pydantic model arguments.""" @ai_function( name="pydantic_test_tool", description="A test tool with Pydantic args", - additional_properties={"otel_settings": otel_settings}, ) def pydantic_test_tool(x: int, y: int) -> int: """A function that adds two numbers using Pydantic args.""" @@ -147,7 +158,8 @@ async def test_ai_function_invoke_telemetry_with_pydantic_args(otel_settings): with ( patch("agent_framework.telemetry.tracer"), - patch("agent_framework._tools.get_function_span") as mock_start_span, + # the span creation uses a form of deepcopy, so need to mock that way + patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span, ): mock_span = Mock() mock_context_manager = Mock() @@ -166,9 +178,16 @@ async def test_ai_function_invoke_telemetry_with_pydantic_args(otel_settings): # Verify telemetry calls mock_start_span.assert_called_once_with( - function=pydantic_test_tool, - tool_call_id="pydantic_call", + attributes={ + OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION, + OtelAttr.TOOL_NAME: "pydantic_test_tool", + OtelAttr.TOOL_CALL_ID: "pydantic_call", + OtelAttr.TOOL_TYPE: "function", + OtelAttr.TOOL_DESCRIPTION: "A test tool with Pydantic args", + OtelAttr.TOOL_ARGUMENTS: '{"x":5,"y":10}', + } ) + assert mock_span.set_attribute.call_count == 2 @pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True) @@ -178,7 +197,6 @@ async def test_ai_function_invoke_telemetry_with_exception(otel_settings): @ai_function( name="exception_test_tool", description="A test tool that raises an exception", - additional_properties={"otel_settings": otel_settings}, ) def exception_test_tool(x: int, y: int) -> int: """A function that raises an exception for telemetry testing.""" @@ -186,7 +204,8 @@ async def test_ai_function_invoke_telemetry_with_exception(otel_settings): with ( patch("agent_framework.telemetry.tracer"), - patch("agent_framework._tools.get_function_span") as mock_start_span, + # the span creation uses a form of deepcopy, so need to mock that way + patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span, ): mock_span = Mock() mock_context_manager = Mock() @@ -216,14 +235,13 @@ async def test_ai_function_invoke_telemetry_with_exception(otel_settings): assert attributes[OtelAttr.ERROR_TYPE] == ValueError.__name__ -@pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True) +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) async def test_ai_function_invoke_telemetry_async_function(otel_settings): """Test the ai_function invoke method with telemetry on async function.""" @ai_function( name="async_telemetry_test", description="An async test tool for telemetry", - additional_properties={"otel_settings": otel_settings}, ) async def async_telemetry_test(x: int, y: int) -> int: """An async function for telemetry testing.""" @@ -231,7 +249,8 @@ async def test_ai_function_invoke_telemetry_async_function(otel_settings): with ( patch("agent_framework.telemetry.tracer"), - patch("agent_framework._tools.get_function_span") as mock_start_span, + # the span creation uses a form of deepcopy, so need to mock that way + patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span, ): mock_span = Mock() mock_context_manager = Mock() @@ -249,7 +268,17 @@ async def test_ai_function_invoke_telemetry_async_function(otel_settings): assert result == 12 # Verify telemetry calls - mock_start_span.assert_called_once_with(function=async_telemetry_test, tool_call_id="async_call") + mock_start_span.assert_called_once_with( + attributes={ + OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION, + OtelAttr.TOOL_NAME: "async_telemetry_test", + OtelAttr.TOOL_CALL_ID: "async_call", + OtelAttr.TOOL_TYPE: "function", + OtelAttr.TOOL_DESCRIPTION: "An async test tool for telemetry", + OtelAttr.TOOL_ARGUMENTS: '{"x": 3, "y": 4}', + } + ) + assert mock_span.set_attribute.call_count == 2 # Verify histogram recording mock_histogram.record.assert_called_once() diff --git a/python/packages/main/tests/main/utils.py b/python/packages/main/tests/main/utils.py new file mode 100644 index 0000000000..61bf277d94 --- /dev/null +++ b/python/packages/main/tests/main/utils.py @@ -0,0 +1,12 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from copy import deepcopy +from unittest.mock import MagicMock + + +class CopyingMock(MagicMock): + def __call__(self, *args, **kwargs): + args = deepcopy(args) + kwargs = deepcopy(kwargs) + return super().__call__(*args, **kwargs) diff --git a/python/samples/getting_started/telemetry/02a-generic_chat_client.py b/python/samples/getting_started/telemetry/02a-generic_chat_client.py index 6fb85a440a..e5e709039a 100644 --- a/python/samples/getting_started/telemetry/02a-generic_chat_client.py +++ b/python/samples/getting_started/telemetry/02a-generic_chat_client.py @@ -108,15 +108,15 @@ async def main(scenario: Literal["chat_client", "chat_client_stream", "ai_functi client = OpenAIResponsesClient() # Scenarios where telemetry is collected in the SDK, from the most basic to the most complex. + if scenario == "ai_function" or scenario == "all": + with suppress(Exception): + await run_ai_function() if scenario == "chat_client_stream" or scenario == "all": with suppress(Exception): await run_chat_client(client, stream=True) if scenario == "chat_client" or scenario == "all": with suppress(Exception): await run_chat_client(client, stream=False) - if scenario == "ai_function" or scenario == "all": - with suppress(Exception): - await run_ai_function() if __name__ == "__main__":