Python: added args and results to function call span (#733)

* added args and results to function call span

* add ignores for mypy

* improved serialization

* also add None result

* improved handling

* log args when None

* slight tweak

* fix tests
This commit is contained in:
Eduard van Valkenburg
2025-09-15 20:28:01 +02:00
committed by GitHub
Unverified
parent ba9a1a61e1
commit db58a10a37
6 changed files with 123 additions and 80 deletions
+26 -14
View File
@@ -2,6 +2,7 @@
import asyncio
import inspect
import json
import sys
from collections.abc import AsyncIterable, Awaitable, Callable, Collection, MutableMapping, Sequence
from functools import wraps
@@ -31,6 +32,7 @@ from .telemetry import (
OtelAttr,
_capture_exception, # type: ignore
get_function_span,
get_function_span_attributes,
meter,
)
@@ -407,7 +409,7 @@ class AIFunction(BaseTool, Generic[ArgsT, ReturnT]):
if not isinstance(arguments, self.input_model):
raise TypeError(f"Expected {self.input_model.__name__}, got {type(arguments).__name__}")
kwargs = arguments.model_dump(exclude_none=True)
if not OTEL_SETTINGS.ENABLED: # type: ignore
if not OTEL_SETTINGS.ENABLED: # type: ignore[name-defined]
logger.info(f"Function name: {self.name}")
logger.debug(f"Function arguments: {kwargs}")
res = self.__call__(**kwargs)
@@ -417,16 +419,19 @@ class AIFunction(BaseTool, Generic[ArgsT, ReturnT]):
return result # type: ignore[reportReturnType]
setup_telemetry()
with get_function_span(
function=self,
tool_call_id=tool_call_id,
) as span:
hist_attributes: dict[str, Any] = {
OtelAttr.MEASUREMENT_FUNCTION_TAG_NAME: self.name,
OtelAttr.TOOL_CALL_ID: tool_call_id or "unknown",
}
attributes = get_function_span_attributes(self, tool_call_id=tool_call_id)
if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined]
attributes.update({
OtelAttr.TOOL_ARGUMENTS: arguments.model_dump_json()
if arguments
else json.dumps(kwargs)
if kwargs
else "None"
})
with get_function_span(attributes=attributes) as span:
attributes[OtelAttr.MEASUREMENT_FUNCTION_TAG_NAME] = self.name
logger.info(f"Function name: {self.name}")
if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore
if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined]
logger.debug(f"Function arguments: {kwargs}")
start_time_stamp = perf_counter()
end_time_stamp: float | None = None
@@ -436,19 +441,26 @@ class AIFunction(BaseTool, Generic[ArgsT, ReturnT]):
end_time_stamp = perf_counter()
except Exception as exception:
end_time_stamp = perf_counter()
hist_attributes[OtelAttr.ERROR_TYPE] = type(exception).__name__
attributes[OtelAttr.ERROR_TYPE] = type(exception).__name__
_capture_exception(span=span, exception=exception, timestamp=time_ns())
logger.error(f"Function failed. Error: {exception}")
raise
else:
logger.info(f"Function {self.name} succeeded.")
if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore
logger.debug(f"Function result: {result or 'None'}")
if OTEL_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined]
try:
json_result = json.dumps(result)
except (TypeError, OverflowError):
span.set_attribute(OtelAttr.TOOL_RESULT, "<non-serializable result>")
logger.debug("Function result: <non-serializable result>")
else:
span.set_attribute(OtelAttr.TOOL_RESULT, json_result)
logger.debug(f"Function result: {json_result}")
return result # type: ignore[reportReturnType]
finally:
duration = (end_time_stamp or perf_counter()) - start_time_stamp
span.set_attribute(OtelAttr.MEASUREMENT_FUNCTION_INVOCATION_DURATION, duration)
self._invocation_duration_histogram.record(duration, attributes=hist_attributes)
self._invocation_duration_histogram.record(duration, attributes=attributes)
logger.info("Function duration: %fs", duration)
def parameters(self) -> dict[str, Any]:
@@ -203,6 +203,8 @@ class OtelAttr(str, Enum):
TOOL_DESCRIPTION = "gen_ai.tool.description"
TOOL_NAME = "gen_ai.tool.name"
TOOL_TYPE = "gen_ai.tool.type"
TOOL_ARGUMENTS = "gen_ai.tool.call.arguments"
TOOL_RESULT = "gen_ai.tool.call.result"
# Agent attributes
AGENT_ID = "gen_ai.agent.id"
# Client attributes
@@ -885,18 +887,15 @@ def use_agent_telemetry(
# region Otel Helpers
def get_function_span(
function: "AIFunction[Any, Any]",
tool_call_id: str | None = None,
) -> "_AgnosticContextManager[Span]":
"""Starts a span for the given function.
def get_function_span_attributes(function: "AIFunction[Any, Any]", tool_call_id: str | None = None) -> dict[str, str]:
"""Get the span attributes for the given function.
Args:
function: The function for which to start the span.
function: The function for which to get the span attributes.
tool_call_id: The id of the tool_call that was requested.
Returns:
trace.Span: The started span as a context manager.
dict[str, str]: The span attributes.
"""
attributes: dict[str, str] = {
OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION,
@@ -906,9 +905,22 @@ def get_function_span(
}
if function.description:
attributes[OtelAttr.TOOL_DESCRIPTION] = function.description
return attributes
def get_function_span(
attributes: dict[str, str],
) -> "_AgnosticContextManager[Span]":
"""Starts a span for the given function.
Args:
attributes: The span attributes.
Returns:
trace.Span: The started span as a context manager.
"""
return tracer.start_as_current_span(
name=f"{OtelAttr.TOOL_EXECUTION_OPERATION} {function.name}",
name=f"{attributes[OtelAttr.OPERATION]} {attributes[OtelAttr.TOOL_NAME]}",
attributes=attributes,
set_status_on_exception=False,
end_on_exit=True,
@@ -37,6 +37,8 @@ from agent_framework.telemetry import (
use_telemetry,
)
from .utils import CopyingMock
# region Test constants
@@ -198,8 +200,14 @@ def test_start_span_basic():
mock_function = Mock()
mock_function.name = "test_function"
mock_function.description = "Test function description"
attributes = {
OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION,
OtelAttr.TOOL_NAME: "test_function",
OtelAttr.TOOL_DESCRIPTION: "Test function description",
OtelAttr.TOOL_TYPE: "function",
}
result = get_function_span(mock_function)
result = get_function_span(attributes)
assert result == mock_span
mock_tracer.start_as_current_span.assert_called_once()
@@ -217,7 +225,7 @@ def test_start_span_with_tool_call_id():
"""Test starting a span with tool_call_id."""
mock_tracer = Mock()
with patch("agent_framework.telemetry.tracer", mock_tracer):
mock_span = Mock()
mock_span = CopyingMock()
mock_tracer.start_as_current_span.return_value = mock_span
mock_function = Mock()
@@ -225,32 +233,21 @@ def test_start_span_with_tool_call_id():
mock_function.description = "Test function"
tool_call_id = "test_call_123"
attributes = {
OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION,
OtelAttr.TOOL_NAME: "test_function",
OtelAttr.TOOL_DESCRIPTION: "Test function",
OtelAttr.TOOL_TYPE: "function",
OtelAttr.TOOL_CALL_ID: tool_call_id,
}
_ = get_function_span(mock_function, tool_call_id)
_ = get_function_span(attributes)
call_args = mock_tracer.start_as_current_span.call_args
attributes = call_args[1]["attributes"]
assert attributes[OtelAttr.TOOL_CALL_ID] == "test_call_123"
def test_start_span_without_description():
"""Test starting a span when function has no description."""
mock_tracer = Mock()
with patch("agent_framework.telemetry.tracer", mock_tracer):
mock_span = Mock()
mock_tracer.start_as_current_span.return_value = mock_span
mock_function = Mock()
mock_function.name = "test_function"
mock_function.description = None
get_function_span(mock_function)
call_args = mock_tracer.start_as_current_span.call_args
attributes = call_args[1]["attributes"]
assert OtelAttr.TOOL_DESCRIPTION not in attributes
# region Test use_telemetry decorator
@@ -377,25 +374,6 @@ async def test_streaming_response_with_otel(mock_chat_client, otel_settings):
mock_set_output.assert_called_once()
def test_start_as_current_span_with_none_metadata():
"""Test get_function_span with None metadata."""
mock_tracer = Mock()
with patch("agent_framework.telemetry.tracer", mock_tracer):
mock_span = Mock()
mock_tracer.start_as_current_span.return_value = mock_span
mock_function = Mock()
mock_function.name = "test_function"
mock_function.description = "Test description"
result = get_function_span(mock_function, None)
assert result == mock_span
call_args = mock_tracer.start_as_current_span.call_args
attributes = call_args[1]["attributes"]
assert attributes[OtelAttr.TOOL_CALL_ID] == "unknown"
def test_prepend_user_agent_with_none_value():
"""Test prepend user agent with None value in headers."""
headers = {"User-Agent": None}
+44 -15
View File
@@ -17,6 +17,8 @@ from agent_framework._tools import _parse_inputs
from agent_framework.exceptions import ToolException
from agent_framework.telemetry import OtelAttr
from .utils import CopyingMock
# region AIFunction and ai_function decorator tests
@@ -83,14 +85,13 @@ async def test_ai_function_decorator_with_async():
assert (await async_test_tool(1, 2)) == 3
@pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True)
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_ai_function_invoke_telemetry_enabled(otel_settings):
"""Test the ai_function invoke method with telemetry enabled."""
@ai_function(
name="telemetry_test_tool",
description="A test tool for telemetry",
additional_properties={"otel_settings": otel_settings},
)
def telemetry_test_tool(x: int, y: int) -> int:
"""A function that adds two numbers for telemetry testing."""
@@ -99,7 +100,8 @@ async def test_ai_function_invoke_telemetry_enabled(otel_settings):
# Mock the tracer and span
with (
patch("agent_framework.telemetry.tracer"),
patch("agent_framework._tools.get_function_span") as mock_start_span,
# the span creation uses a form of deepcopy, so need to mock that way
patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span,
):
mock_span = Mock()
mock_context_manager = Mock()
@@ -118,7 +120,17 @@ async def test_ai_function_invoke_telemetry_enabled(otel_settings):
assert result == 3
# Verify telemetry calls
mock_start_span.assert_called_once_with(function=telemetry_test_tool, tool_call_id="test_call_id")
mock_start_span.assert_called_once_with(
attributes={
OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION,
OtelAttr.TOOL_NAME: "telemetry_test_tool",
OtelAttr.TOOL_CALL_ID: "test_call_id",
OtelAttr.TOOL_TYPE: "function",
OtelAttr.TOOL_DESCRIPTION: "A test tool for telemetry",
OtelAttr.TOOL_ARGUMENTS: '{"x": 1, "y": 2}',
}
)
assert mock_span.set_attribute.call_count == 2
# Verify histogram was called with correct attributes
mock_histogram.record.assert_called_once()
@@ -129,14 +141,13 @@ async def test_ai_function_invoke_telemetry_enabled(otel_settings):
assert attributes[OtelAttr.TOOL_CALL_ID] == "test_call_id"
@pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True)
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_ai_function_invoke_telemetry_with_pydantic_args(otel_settings):
"""Test the ai_function invoke method with Pydantic model arguments."""
@ai_function(
name="pydantic_test_tool",
description="A test tool with Pydantic args",
additional_properties={"otel_settings": otel_settings},
)
def pydantic_test_tool(x: int, y: int) -> int:
"""A function that adds two numbers using Pydantic args."""
@@ -147,7 +158,8 @@ async def test_ai_function_invoke_telemetry_with_pydantic_args(otel_settings):
with (
patch("agent_framework.telemetry.tracer"),
patch("agent_framework._tools.get_function_span") as mock_start_span,
# the span creation uses a form of deepcopy, so need to mock that way
patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span,
):
mock_span = Mock()
mock_context_manager = Mock()
@@ -166,9 +178,16 @@ async def test_ai_function_invoke_telemetry_with_pydantic_args(otel_settings):
# Verify telemetry calls
mock_start_span.assert_called_once_with(
function=pydantic_test_tool,
tool_call_id="pydantic_call",
attributes={
OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION,
OtelAttr.TOOL_NAME: "pydantic_test_tool",
OtelAttr.TOOL_CALL_ID: "pydantic_call",
OtelAttr.TOOL_TYPE: "function",
OtelAttr.TOOL_DESCRIPTION: "A test tool with Pydantic args",
OtelAttr.TOOL_ARGUMENTS: '{"x":5,"y":10}',
}
)
assert mock_span.set_attribute.call_count == 2
@pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True)
@@ -178,7 +197,6 @@ async def test_ai_function_invoke_telemetry_with_exception(otel_settings):
@ai_function(
name="exception_test_tool",
description="A test tool that raises an exception",
additional_properties={"otel_settings": otel_settings},
)
def exception_test_tool(x: int, y: int) -> int:
"""A function that raises an exception for telemetry testing."""
@@ -186,7 +204,8 @@ async def test_ai_function_invoke_telemetry_with_exception(otel_settings):
with (
patch("agent_framework.telemetry.tracer"),
patch("agent_framework._tools.get_function_span") as mock_start_span,
# the span creation uses a form of deepcopy, so need to mock that way
patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span,
):
mock_span = Mock()
mock_context_manager = Mock()
@@ -216,14 +235,13 @@ async def test_ai_function_invoke_telemetry_with_exception(otel_settings):
assert attributes[OtelAttr.ERROR_TYPE] == ValueError.__name__
@pytest.mark.parametrize("otel_settings", [(True, True)], indirect=True)
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_ai_function_invoke_telemetry_async_function(otel_settings):
"""Test the ai_function invoke method with telemetry on async function."""
@ai_function(
name="async_telemetry_test",
description="An async test tool for telemetry",
additional_properties={"otel_settings": otel_settings},
)
async def async_telemetry_test(x: int, y: int) -> int:
"""An async function for telemetry testing."""
@@ -231,7 +249,8 @@ async def test_ai_function_invoke_telemetry_async_function(otel_settings):
with (
patch("agent_framework.telemetry.tracer"),
patch("agent_framework._tools.get_function_span") as mock_start_span,
# the span creation uses a form of deepcopy, so need to mock that way
patch("agent_framework._tools.get_function_span", new_callable=CopyingMock) as mock_start_span,
):
mock_span = Mock()
mock_context_manager = Mock()
@@ -249,7 +268,17 @@ async def test_ai_function_invoke_telemetry_async_function(otel_settings):
assert result == 12
# Verify telemetry calls
mock_start_span.assert_called_once_with(function=async_telemetry_test, tool_call_id="async_call")
mock_start_span.assert_called_once_with(
attributes={
OtelAttr.OPERATION: OtelAttr.TOOL_EXECUTION_OPERATION,
OtelAttr.TOOL_NAME: "async_telemetry_test",
OtelAttr.TOOL_CALL_ID: "async_call",
OtelAttr.TOOL_TYPE: "function",
OtelAttr.TOOL_DESCRIPTION: "An async test tool for telemetry",
OtelAttr.TOOL_ARGUMENTS: '{"x": 3, "y": 4}',
}
)
assert mock_span.set_attribute.call_count == 2
# Verify histogram recording
mock_histogram.record.assert_called_once()
+12
View File
@@ -0,0 +1,12 @@
# Copyright (c) Microsoft. All rights reserved.
from copy import deepcopy
from unittest.mock import MagicMock
class CopyingMock(MagicMock):
def __call__(self, *args, **kwargs):
args = deepcopy(args)
kwargs = deepcopy(kwargs)
return super().__call__(*args, **kwargs)
@@ -108,15 +108,15 @@ async def main(scenario: Literal["chat_client", "chat_client_stream", "ai_functi
client = OpenAIResponsesClient()
# Scenarios where telemetry is collected in the SDK, from the most basic to the most complex.
if scenario == "ai_function" or scenario == "all":
with suppress(Exception):
await run_ai_function()
if scenario == "chat_client_stream" or scenario == "all":
with suppress(Exception):
await run_chat_client(client, stream=True)
if scenario == "chat_client" or scenario == "all":
with suppress(Exception):
await run_chat_client(client, stream=False)
if scenario == "ai_function" or scenario == "all":
with suppress(Exception):
await run_ai_function()
if __name__ == "__main__":