From c8b8198af10ffcc91070eeb692174e047b35ad70 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 May 2026 12:59:06 -0700 Subject: [PATCH] Python: Prevent duplicate system instructions in Python telemetry (#5981) * Initial plan * Fix duplicated system instructions in Python telemetry * Clarify telemetry message filtering * test: cover separate and in-history system messages * Clarify observability message logging split * Simplify observability logging serialization * Harden observability regression test * Reuse observability span message serialization * Clarify observability logging loops * Polish observability message serialization * Tighten observability zip checks * Refactor observability message capture loop * Fix telemetry logging for separate system instructions * Refine observability OTEL message typing * Restore prepended-instruction logging path in _capture_messages * Revert logging change in _capture_messages; keep chat-history-only logging --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- .../core/agent_framework/observability.py | 8 +- .../core/tests/core/test_observability.py | 147 +++++++++++++++++- 2 files changed, 149 insertions(+), 6 deletions(-) diff --git a/python/packages/core/agent_framework/observability.py b/python/packages/core/agent_framework/observability.py index 022008c05b..362be2146e 100644 --- a/python/packages/core/agent_framework/observability.py +++ b/python/packages/core/agent_framework/observability.py @@ -2153,15 +2153,14 @@ def _capture_messages( finish_reason: FinishReason | None = None, ) -> None: """Log messages with extra information.""" - from ._types import normalize_messages, prepend_instructions_to_messages + from ._types import normalize_messages - prepped = prepend_instructions_to_messages(normalize_messages(messages), system_instructions) + normalized_messages = normalize_messages(messages) otel_messages: list[dict[str, Any]] = [] - for index, message in enumerate(prepped): + for index, message in enumerate(normalized_messages): # Reuse the otel message representation for logging instead of calling to_dict() # to avoid expensive Pydantic serialization overhead otel_message = _to_otel_message(message) - otel_messages.append(otel_message) logger.info( otel_message, extra={ @@ -2170,6 +2169,7 @@ def _capture_messages( MessageListTimestampFilter.INDEX_KEY: index, }, ) + otel_messages.append(otel_message) if finish_reason: otel_messages[-1]["finish_reason"] = FINISH_REASON_MAP[finish_reason] span.set_attribute( diff --git a/python/packages/core/tests/core/test_observability.py b/python/packages/core/tests/core/test_observability.py index 6185bccd44..d4403043af 100644 --- a/python/packages/core/tests/core/test_observability.py +++ b/python/packages/core/tests/core/test_observability.py @@ -290,9 +290,9 @@ async def test_chat_client_observability_with_instructions( assert len(system_instructions) == 1 assert system_instructions[0]["content"] == "You are a helpful assistant." - # Verify input_messages contains system message + # Verify input_messages excludes system instructions input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES]) - assert any(msg.get("role") == "system" for msg in input_messages) + assert [msg.get("role") for msg in input_messages] == ["user"] @pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) @@ -324,6 +324,40 @@ async def test_chat_client_streaming_observability_with_instructions( assert len(system_instructions) == 1 assert system_instructions[0]["content"] == "You are a helpful assistant." + input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES]) + assert [msg.get("role") for msg in input_messages] == ["user"] + + +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) +async def test_chat_client_observability_with_system_message_and_instructions( + mock_chat_client, span_exporter: InMemorySpanExporter, enable_sensitive_data +): + """Test input chat-history system messages stay in input_messages when instructions are separate.""" + import json + + client = mock_chat_client() + + messages = [ + Message(role="system", contents=["Original system message"]), + Message(role="user", contents=["Test message"]), + ] + options = {"model": "Test", "instructions": "Framework system instruction"} + span_exporter.clear() + response = await client.get_response(messages=messages, options=options) + + assert response is not None + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + system_instructions = json.loads(span.attributes[OtelAttr.SYSTEM_INSTRUCTIONS]) + assert system_instructions == [{"type": "text", "content": "Framework system instruction"}] + + input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES]) + assert [msg.get("role") for msg in input_messages] == ["system", "user"] + assert input_messages[0]["parts"][0]["content"] == "Original system message" + assert input_messages[1]["parts"][0]["content"] == "Test message" + @pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) async def test_chat_client_observability_without_instructions( @@ -2981,6 +3015,81 @@ async def test_system_instructions_preserves_non_ascii_characters(span_exporter: system_instructions = json.loads(system_instructions_json) assert system_instructions[0]["content"] == chinese_text + input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES]) + assert [msg.get("role") for msg in input_messages] == ["user"] + + +def test_capture_messages_keeps_framework_instructions_out_of_logs_and_span_messages( + span_exporter: InMemorySpanExporter, +): + """Test separate framework instructions do not appear in chat-history logs or span messages.""" + import json + + from opentelemetry import trace + + tracer = trace.get_tracer("test") + span_exporter.clear() + + with ( + patch("agent_framework.observability.logger.info") as mock_logger_info, + tracer.start_as_current_span("test_span") as span, + ): + _capture_messages( + span=span, + provider_name="test_provider", + messages=[Message(role="user", contents=["Test"])], + system_instructions="Framework system instruction", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + input_messages = json.loads(spans[0].attributes[OtelAttr.INPUT_MESSAGES]) + assert [msg.get("role") for msg in input_messages] == ["user"] + + assert mock_logger_info.call_count == 1, f"Expected 1 log call, got {mock_logger_info.call_count}" + (first_call,) = mock_logger_info.call_args_list + assert first_call.args + logged_message = first_call.args[0] + assert logged_message["role"] == "user" + assert logged_message["parts"][0]["content"] == "Test" + + +def test_capture_messages_logs_only_chat_history_when_framework_instructions_are_separate( + span_exporter: InMemorySpanExporter, +): + """Test chat-history logging preserves original system messages without prepending framework instructions.""" + import json + + from opentelemetry import trace + + tracer = trace.get_tracer("test") + span_exporter.clear() + + with ( + patch("agent_framework.observability.logger.info") as mock_logger_info, + tracer.start_as_current_span("test_span") as span, + ): + _capture_messages( + span=span, + provider_name="test_provider", + messages=[ + Message(role="system", contents=["Original system message"]), + Message(role="user", contents=["Test"]), + ], + system_instructions="Framework system instruction", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + input_messages = json.loads(spans[0].attributes[OtelAttr.INPUT_MESSAGES]) + assert [msg.get("role") for msg in input_messages] == ["system", "user"] + + assert mock_logger_info.call_count == 2, f"Expected 2 log calls, got {mock_logger_info.call_count}" + logged_messages = [call.args[0] for call in mock_logger_info.call_args_list] + assert [msg["role"] for msg in logged_messages] == ["system", "user"] + assert logged_messages[0]["parts"][0]["content"] == "Original system message" + assert logged_messages[1]["parts"][0]["content"] == "Test" + @pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) async def test_tool_arguments_preserves_non_ascii_characters(span_exporter: InMemorySpanExporter): @@ -3104,6 +3213,40 @@ async def test_agent_instructions_from_default_options( assert len(system_instructions) == 1 assert system_instructions[0]["content"] == "Default system instructions." + input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES]) + assert [msg.get("role") for msg in input_messages] == ["user"] + + +@pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) +async def test_agent_instructions_preserve_system_messages_in_history( + mock_chat_agent, span_exporter: InMemorySpanExporter, enable_sensitive_data +): + """Test agent spans keep chat-history system messages separate from framework instructions.""" + import json + + agent = mock_chat_agent() + agent.default_options = {"model": "TestModel", "instructions": "Default system instructions."} + + messages = [ + Message(role="system", contents=["Original system message"]), + Message(role="user", contents=["Test message"]), + ] + span_exporter.clear() + response = await agent.run(messages) + + assert response is not None + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + system_instructions = json.loads(span.attributes[OtelAttr.SYSTEM_INSTRUCTIONS]) + assert system_instructions == [{"type": "text", "content": "Default system instructions."}] + + input_messages = json.loads(span.attributes[OtelAttr.INPUT_MESSAGES]) + assert [msg.get("role") for msg in input_messages] == ["system", "user"] + assert input_messages[0]["parts"][0]["content"] == "Original system message" + assert input_messages[1]["parts"][0]["content"] == "Test message" + @pytest.mark.parametrize("enable_sensitive_data", [True], indirect=True) async def test_agent_instructions_from_options_override(