Python: Fix non-ascii chars in span attributes (#3894)

* Fix non-ascii chars in span attributes

* Comments
This commit is contained in:
Tao Chen
2026-02-12 14:53:32 -08:00
committed by GitHub
Unverified
parent a276c1295a
commit 1441fd903c
3 changed files with 181 additions and 5 deletions
@@ -475,9 +475,9 @@ class FunctionTool(SerializationMixin, Generic[ArgsT]):
}
}
attributes.update({
OtelAttr.TOOL_ARGUMENTS: arguments.model_dump_json()
OtelAttr.TOOL_ARGUMENTS: arguments.model_dump_json(ensure_ascii=False)
if arguments
else json.dumps(serializable_kwargs, default=str)
else json.dumps(serializable_kwargs, default=str, ensure_ascii=False)
if serializable_kwargs
else "None"
})
@@ -1557,7 +1557,7 @@ OTEL_ATTR_MAP: dict[str | tuple[str, ...], tuple[str, Callable[[Any], Any] | Non
"tools": (
OtelAttr.TOOL_DEFINITIONS,
lambda tools: (
json.dumps(tools_dict)
json.dumps(tools_dict, ensure_ascii=False)
if (tools_dict := __import__("agent_framework._tools", fromlist=["_tools_to_dict"])._tools_to_dict(tools))
else None
),
@@ -1639,12 +1639,14 @@ def _capture_messages(
)
if finish_reason:
otel_messages[-1]["finish_reason"] = FINISH_REASON_MAP[finish_reason]
span.set_attribute(OtelAttr.OUTPUT_MESSAGES if output else OtelAttr.INPUT_MESSAGES, json.dumps(otel_messages))
span.set_attribute(
OtelAttr.OUTPUT_MESSAGES if output else OtelAttr.INPUT_MESSAGES, json.dumps(otel_messages, ensure_ascii=False)
)
if system_instructions:
if not isinstance(system_instructions, list):
system_instructions = [system_instructions]
otel_sys_instructions = [{"type": "text", "content": instruction} for instruction in system_instructions]
span.set_attribute(OtelAttr.SYSTEM_INSTRUCTIONS, json.dumps(otel_sys_instructions))
span.set_attribute(OtelAttr.SYSTEM_INSTRUCTIONS, json.dumps(otel_sys_instructions, ensure_ascii=False))
def _to_otel_message(message: Message) -> dict[str, Any]:
@@ -30,6 +30,7 @@ from agent_framework.observability import (
ChatTelemetryLayer,
MessageListTimestampFilter,
OtelAttr,
_capture_messages,
get_function_span,
)
@@ -2263,3 +2264,176 @@ async def test_layer_ordering_span_sequence_with_function_calling(span_exporter:
# Third span: second chat (LLM call with function result)
assert sorted_spans[2].name.startswith("chat"), f"Third span should be 'chat', got '{sorted_spans[2].name}'"
# region Test non-ASCII character handling in JSON serialization
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_capture_messages_preserves_non_ascii_characters(mock_chat_client, span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters (e.g., Japanese) are preserved in span attributes."""
import json
japanese_text = "こんにちは世界" # "Hello World" in Japanese
class ClientWithJapanese(mock_chat_client):
async def _inner_get_response(self, *, messages, options, **kwargs):
return ChatResponse(
messages=[Message(role="assistant", text=japanese_text)],
usage_details=UsageDetails(input_token_count=5, output_token_count=10),
)
client = ClientWithJapanese()
messages = [Message(role="user", text=japanese_text)]
span_exporter.clear()
response = await client.get_response(messages=messages, model_id="Test")
assert response is not None
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
# Verify input messages preserve Japanese characters
input_messages_json = span.attributes[OtelAttr.INPUT_MESSAGES]
assert japanese_text in input_messages_json
# Ensure it's not escaped to Unicode
assert "\\u" not in input_messages_json
# Verify output messages preserve Japanese characters
output_messages_json = span.attributes[OtelAttr.OUTPUT_MESSAGES]
assert japanese_text in output_messages_json
assert "\\u" not in output_messages_json
# Verify JSON is valid and contains the text
input_messages = json.loads(input_messages_json)
assert input_messages[0]["parts"][0]["content"] == japanese_text
output_messages = json.loads(output_messages_json)
assert output_messages[0]["parts"][0]["content"] == japanese_text
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_system_instructions_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters are preserved in system instructions span attribute."""
import json
from opentelemetry import trace
chinese_text = "你好世界" # "Hello World" in Chinese
tracer = trace.get_tracer("test")
span_exporter.clear()
with tracer.start_as_current_span("test_span") as span:
_capture_messages(
span=span,
provider_name="test_provider",
messages=[Message(role="user", text="Test")],
system_instructions=chinese_text,
)
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
# Verify system instructions preserve Chinese characters
system_instructions_json = span.attributes[OtelAttr.SYSTEM_INSTRUCTIONS]
assert chinese_text in system_instructions_json
assert "\\u" not in system_instructions_json
# Verify JSON is valid and contains the text
system_instructions = json.loads(system_instructions_json)
assert system_instructions[0]["content"] == chinese_text
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_tool_arguments_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters are preserved in tool arguments span attribute."""
import json
korean_text = "안녕하세요" # "Hello" in Korean
@tool
def greet(message: str) -> str:
"""Greet with a message."""
return f"Greeted: {message}"
span_exporter.clear()
await greet.invoke(message=korean_text)
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
# Verify tool arguments preserve Korean characters
tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS]
assert korean_text in tool_arguments_json
assert "\\u" not in tool_arguments_json
# Verify JSON is valid and contains the text
tool_arguments = json.loads(tool_arguments_json)
assert tool_arguments["message"] == korean_text
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_tool_result_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter):
"""Test that non-ASCII characters are preserved in tool result span attribute."""
arabic_text = "مرحبا بالعالم" # "Hello World" in Arabic
@tool
def echo(text: str) -> str:
"""Echo the text back."""
return text
span_exporter.clear()
result = await echo.invoke(text=arabic_text)
assert result == arabic_text
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
# Verify tool result preserves Arabic characters
tool_result = span.attributes[OtelAttr.TOOL_RESULT]
assert arabic_text in tool_result
@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True)
async def test_tool_arguments_pydantic_preserves_non_ascii_characters(
span_exporter: InMemorySpanExporter,
) -> None:
"""Test that non-ASCII characters are preserved in tool arguments when using a Pydantic model."""
import json
from pydantic import BaseModel
japanese_text = "こんにちは" # "Hello" in Japanese
class Greeting(BaseModel):
message: str
@tool
def greet_with_model(greeting: Greeting) -> str:
"""Greet with a message contained in a Pydantic model."""
# When invoked via the tool's input_model, greeting is passed as a dict
if isinstance(greeting, dict):
return f"Greeted: {greeting['message']}"
return f"Greeted: {greeting.message}"
span_exporter.clear()
# Use the tool's input_model to properly pass the Pydantic model argument
input_model = greet_with_model.input_model
await greet_with_model.invoke(arguments=input_model(greeting=Greeting(message=japanese_text)))
spans = span_exporter.get_finished_spans()
assert len(spans) == 1
span = spans[0]
# Verify tool arguments preserve Japanese characters
tool_arguments_json = span.attributes[OtelAttr.TOOL_ARGUMENTS]
assert japanese_text in tool_arguments_json
assert "\\u" not in tool_arguments_json
# Verify JSON is valid and contains the text
tool_arguments = json.loads(tool_arguments_json)
assert tool_arguments["greeting"]["message"] == japanese_text