mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Fix Eval samples (#4033)
* fix red team sample * Updated self-reflection * fix for workflow eval sample * fix test
This commit is contained in:
committed by
GitHub
Unverified
parent
6a39d5a652
commit
aab80d9ed9
@@ -788,6 +788,9 @@ class RawOpenAIResponsesClient( # type: ignore[misc]
|
||||
request_input = self._prepare_messages_for_openai(messages)
|
||||
if not request_input:
|
||||
raise ServiceInvalidRequestError("Messages are required for chat completions")
|
||||
|
||||
conversation_id = self._get_current_conversation_id(options, **kwargs)
|
||||
|
||||
run_options["input"] = request_input
|
||||
|
||||
# model id
|
||||
@@ -911,8 +914,11 @@ class RawOpenAIResponsesClient( # type: ignore[misc]
|
||||
for content in message.contents:
|
||||
match content.type:
|
||||
case "text_reasoning":
|
||||
# Don't send reasoning content back to model
|
||||
continue
|
||||
# Reasoning items must be sent back as top-level input items
|
||||
# for reasoning models that require them alongside function_calls
|
||||
reasoning = self._prepare_content_for_openai(message.role, content, call_id_to_id) # type: ignore[arg-type]
|
||||
if reasoning:
|
||||
all_messages.append(reasoning)
|
||||
case "function_result":
|
||||
new_args: dict[str, Any] = {}
|
||||
new_args.update(self._prepare_content_for_openai(message.role, content, call_id_to_id)) # type: ignore[arg-type]
|
||||
@@ -967,6 +973,8 @@ class RawOpenAIResponsesClient( # type: ignore[misc]
|
||||
}
|
||||
props: dict[str, Any] | None = getattr(content, "additional_properties", None)
|
||||
if props:
|
||||
if reasoning_id := props.get("reasoning_id"):
|
||||
ret["id"] = reasoning_id
|
||||
if status := props.get("status"):
|
||||
ret["status"] = status
|
||||
if reasoning_text := props.get("reasoning_text"):
|
||||
@@ -1184,22 +1192,29 @@ class RawOpenAIResponsesClient( # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
case "reasoning": # ResponseOutputReasoning
|
||||
reasoning_id = getattr(item, "id", None)
|
||||
if hasattr(item, "content") and item.content:
|
||||
for index, reasoning_content in enumerate(item.content):
|
||||
additional_properties = None
|
||||
additional_properties: dict[str, Any] = {}
|
||||
if reasoning_id:
|
||||
additional_properties["reasoning_id"] = reasoning_id
|
||||
if hasattr(item, "summary") and item.summary and index < len(item.summary):
|
||||
additional_properties = {"summary": item.summary[index]}
|
||||
additional_properties["summary"] = item.summary[index]
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
text=reasoning_content.text,
|
||||
raw_representation=reasoning_content,
|
||||
additional_properties=additional_properties,
|
||||
additional_properties=additional_properties or None,
|
||||
)
|
||||
)
|
||||
if hasattr(item, "summary") and item.summary:
|
||||
for summary in item.summary:
|
||||
contents.append(
|
||||
Content.from_text_reasoning(text=summary.text, raw_representation=summary) # type: ignore[arg-type]
|
||||
Content.from_text_reasoning(
|
||||
text=summary.text,
|
||||
raw_representation=summary, # type: ignore[arg-type]
|
||||
additional_properties={"reasoning_id": reasoning_id} if reasoning_id else None,
|
||||
)
|
||||
)
|
||||
case "code_interpreter_call": # ResponseOutputCodeInterpreterCall
|
||||
call_id = getattr(item, "call_id", None) or getattr(item, "id", None)
|
||||
@@ -1413,16 +1428,40 @@ class RawOpenAIResponsesClient( # type: ignore[misc]
|
||||
contents.append(Content.from_text(text=event.delta, raw_representation=event))
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_text.delta":
|
||||
contents.append(Content.from_text_reasoning(text=event.delta, raw_representation=event))
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
text=event.delta,
|
||||
raw_representation=event,
|
||||
additional_properties={"reasoning_id": event.item_id},
|
||||
)
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_text.done":
|
||||
contents.append(Content.from_text_reasoning(text=event.text, raw_representation=event))
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
text=event.text,
|
||||
raw_representation=event,
|
||||
additional_properties={"reasoning_id": event.item_id},
|
||||
)
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_summary_text.delta":
|
||||
contents.append(Content.from_text_reasoning(text=event.delta, raw_representation=event))
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
text=event.delta,
|
||||
raw_representation=event,
|
||||
additional_properties={"reasoning_id": event.item_id},
|
||||
)
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.reasoning_summary_text.done":
|
||||
contents.append(Content.from_text_reasoning(text=event.text, raw_representation=event))
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
text=event.text,
|
||||
raw_representation=event,
|
||||
additional_properties={"reasoning_id": event.item_id},
|
||||
)
|
||||
)
|
||||
metadata.update(self._get_metadata_from_response(event))
|
||||
case "response.code_interpreter_call_code.delta":
|
||||
call_id = getattr(event, "call_id", None) or getattr(event, "id", None) or event.item_id
|
||||
@@ -1593,20 +1632,23 @@ class RawOpenAIResponsesClient( # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
case "reasoning": # ResponseOutputReasoning
|
||||
reasoning_id = getattr(event_item, "id", None)
|
||||
if hasattr(event_item, "content") and event_item.content:
|
||||
for index, reasoning_content in enumerate(event_item.content):
|
||||
additional_properties = None
|
||||
additional_properties: dict[str, Any] = {}
|
||||
if reasoning_id:
|
||||
additional_properties["reasoning_id"] = reasoning_id
|
||||
if (
|
||||
hasattr(event_item, "summary")
|
||||
and event_item.summary
|
||||
and index < len(event_item.summary)
|
||||
):
|
||||
additional_properties = {"summary": event_item.summary[index]}
|
||||
additional_properties["summary"] = event_item.summary[index]
|
||||
contents.append(
|
||||
Content.from_text_reasoning(
|
||||
text=reasoning_content.text,
|
||||
raw_representation=reasoning_content,
|
||||
additional_properties=additional_properties,
|
||||
additional_properties=additional_properties or None,
|
||||
)
|
||||
)
|
||||
case _:
|
||||
|
||||
@@ -129,9 +129,7 @@ def test_azure_assistants_client_init_validation_fail() -> None:
|
||||
def test_azure_assistants_client_init_missing_deployment_name(azure_openai_unit_test_env: dict[str, str]) -> None:
|
||||
"""Test AzureOpenAIAssistantsClient initialization with missing deployment name."""
|
||||
with pytest.raises(ServiceInitializationError):
|
||||
AzureOpenAIAssistantsClient(
|
||||
api_key=azure_openai_unit_test_env.get("AZURE_OPENAI_API_KEY", "test-key")
|
||||
)
|
||||
AzureOpenAIAssistantsClient(api_key=azure_openai_unit_test_env.get("AZURE_OPENAI_API_KEY", "test-key"))
|
||||
|
||||
|
||||
def test_azure_assistants_client_init_with_default_headers(azure_openai_unit_test_env: dict[str, str]) -> None:
|
||||
|
||||
@@ -94,15 +94,13 @@ def test_init_endpoint(azure_openai_unit_test_env: dict[str, str]) -> None:
|
||||
@pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]], indirect=True)
|
||||
def test_init_with_empty_deployment_name(azure_openai_unit_test_env: dict[str, str]) -> None:
|
||||
with pytest.raises(ServiceInitializationError):
|
||||
AzureOpenAIChatClient(
|
||||
)
|
||||
AzureOpenAIChatClient()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_BASE_URL"]], indirect=True)
|
||||
def test_init_with_empty_endpoint_and_base_url(azure_openai_unit_test_env: dict[str, str]) -> None:
|
||||
with pytest.raises(ServiceInitializationError):
|
||||
AzureOpenAIChatClient(
|
||||
)
|
||||
AzureOpenAIChatClient()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("override_env_param_dict", [{"AZURE_OPENAI_ENDPOINT": "http://test.com"}], indirect=True)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Annotated, Any
|
||||
from unittest.mock import MagicMock
|
||||
@@ -30,6 +31,8 @@ skip_if_azure_integration_tests_disabled = pytest.mark.skipif(
|
||||
else "Integration tests are disabled.",
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OutputStruct(BaseModel):
|
||||
"""A structured output for testing purposes."""
|
||||
@@ -111,8 +114,7 @@ def test_init_with_default_header(azure_openai_unit_test_env: dict[str, str]) ->
|
||||
@pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME"]], indirect=True)
|
||||
def test_init_with_empty_model_id(azure_openai_unit_test_env: dict[str, str]) -> None:
|
||||
with pytest.raises(ServiceInitializationError):
|
||||
AzureOpenAIResponsesClient(
|
||||
)
|
||||
AzureOpenAIResponsesClient()
|
||||
|
||||
|
||||
def test_init_with_project_client(azure_openai_unit_test_env: dict[str, str]) -> None:
|
||||
|
||||
@@ -2700,3 +2700,74 @@ async def test_conversation_id_updated_in_options_between_tool_iterations():
|
||||
assert conversation_ids_received[1] == "stream_conv_after_first", (
|
||||
"streaming: conversation_id should be updated in options after receiving new conversation_id from API"
|
||||
)
|
||||
|
||||
|
||||
async def test_streaming_function_calling_response_includes_reasoning_and_tool_results(
|
||||
chat_client_base: SupportsChatGetResponse,
|
||||
):
|
||||
"""Test that the finalized streaming response includes reasoning, function_call,
|
||||
function_result, and final text in its messages.
|
||||
|
||||
This is critical for workflow chaining: when one agent's response is passed as
|
||||
input to the next agent, the conversation must include all items (reasoning,
|
||||
function_call, function_call_output) so the API can validate the history.
|
||||
"""
|
||||
|
||||
@tool(name="search", approval_mode="never_require")
|
||||
def search_func(query: str) -> str:
|
||||
return f"Found results for {query}"
|
||||
|
||||
chat_client_base.streaming_responses = [
|
||||
[
|
||||
# First response: reasoning + function_call
|
||||
ChatResponseUpdate(
|
||||
contents=[
|
||||
Content.from_text_reasoning(
|
||||
text="Let me search for that",
|
||||
additional_properties={"reasoning_id": "rs_test123", "status": "completed"},
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
),
|
||||
ChatResponseUpdate(
|
||||
contents=[
|
||||
Content.from_function_call(
|
||||
call_id="call_1",
|
||||
name="search",
|
||||
arguments='{"query": "test"}',
|
||||
additional_properties={"fc_id": "fc_test456"},
|
||||
)
|
||||
],
|
||||
role="assistant",
|
||||
),
|
||||
],
|
||||
[
|
||||
# Second response: final text
|
||||
ChatResponseUpdate(
|
||||
contents=[Content.from_text(text="Here are the results")],
|
||||
role="assistant",
|
||||
),
|
||||
],
|
||||
]
|
||||
|
||||
stream = chat_client_base.get_response(
|
||||
"search for test", options={"tool_choice": "auto", "tools": [search_func]}, stream=True
|
||||
)
|
||||
|
||||
updates = []
|
||||
async for update in stream:
|
||||
updates.append(update)
|
||||
response = await stream.get_final_response()
|
||||
|
||||
# Verify all content types are in the response messages
|
||||
all_content_types = [c.type for msg in response.messages for c in msg.contents]
|
||||
assert "text_reasoning" in all_content_types, "Reasoning must be preserved in response messages"
|
||||
assert "function_call" in all_content_types, "Function call must be preserved in response messages"
|
||||
assert "function_result" in all_content_types, "Function result must be in response messages for chaining"
|
||||
assert "text" in all_content_types, "Final text must be in response messages"
|
||||
|
||||
# Verify reasoning has the reasoning_id preserved
|
||||
reasoning_contents = [c for msg in response.messages for c in msg.contents if c.type == "text_reasoning"]
|
||||
assert len(reasoning_contents) >= 1
|
||||
assert reasoning_contents[0].additional_properties is not None
|
||||
assert reasoning_contents[0].additional_properties.get("reasoning_id") == "rs_test123"
|
||||
|
||||
@@ -154,9 +154,7 @@ def test_init_validation_fail() -> None:
|
||||
def test_init_missing_model_id(openai_unit_test_env: dict[str, str]) -> None:
|
||||
"""Test OpenAIAssistantsClient initialization with missing model ID."""
|
||||
with pytest.raises(ServiceInitializationError):
|
||||
OpenAIAssistantsClient(
|
||||
api_key=openai_unit_test_env.get("OPENAI_API_KEY", "test-key")
|
||||
)
|
||||
OpenAIAssistantsClient(api_key=openai_unit_test_env.get("OPENAI_API_KEY", "test-key"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("exclude_list", [["OPENAI_API_KEY"]], indirect=True)
|
||||
|
||||
@@ -97,8 +97,7 @@ def test_init_base_url_from_settings_env() -> None:
|
||||
@pytest.mark.parametrize("exclude_list", [["OPENAI_CHAT_MODEL_ID"]], indirect=True)
|
||||
def test_init_with_empty_model_id(openai_unit_test_env: dict[str, str]) -> None:
|
||||
with pytest.raises(ServiceInitializationError):
|
||||
OpenAIChatClient(
|
||||
)
|
||||
OpenAIChatClient()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("exclude_list", [["OPENAI_API_KEY"]], indirect=True)
|
||||
|
||||
@@ -139,8 +139,7 @@ def test_init_with_default_header(openai_unit_test_env: dict[str, str]) -> None:
|
||||
@pytest.mark.parametrize("exclude_list", [["OPENAI_RESPONSES_MODEL_ID"]], indirect=True)
|
||||
def test_init_with_empty_model_id(openai_unit_test_env: dict[str, str]) -> None:
|
||||
with pytest.raises(ServiceInitializationError):
|
||||
OpenAIResponsesClient(
|
||||
)
|
||||
OpenAIResponsesClient()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("exclude_list", [["OPENAI_API_KEY"]], indirect=True)
|
||||
@@ -816,7 +815,101 @@ def test_prepare_message_for_openai_with_function_approval_response() -> None:
|
||||
assert prepared_message["approve"] is True
|
||||
|
||||
|
||||
def test_chat_message_with_error_content() -> None:
|
||||
def test_prepare_message_for_openai_includes_reasoning_with_function_call() -> None:
|
||||
"""Test _prepare_message_for_openai includes reasoning items alongside function_calls.
|
||||
|
||||
Reasoning models require reasoning items to be present in the input when
|
||||
function_call items are included. Stripping reasoning causes a 400 error:
|
||||
"function_call was provided without its required reasoning item".
|
||||
"""
|
||||
client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
|
||||
|
||||
reasoning = Content.from_text_reasoning(
|
||||
text="Let me analyze the request",
|
||||
additional_properties={"status": "completed", "reasoning_id": "rs_abc123"},
|
||||
)
|
||||
function_call = Content.from_function_call(
|
||||
call_id="call_123",
|
||||
name="search_hotels",
|
||||
arguments='{"city": "Paris"}',
|
||||
)
|
||||
|
||||
message = Message(role="assistant", contents=[reasoning, function_call])
|
||||
call_id_to_id: dict[str, str] = {}
|
||||
|
||||
result = client._prepare_message_for_openai(message, call_id_to_id)
|
||||
|
||||
# Both reasoning and function_call should be present as top-level items
|
||||
types = [item["type"] for item in result]
|
||||
assert "reasoning" in types, "Reasoning items must be included for reasoning models"
|
||||
assert "function_call" in types
|
||||
|
||||
reasoning_item = next(item for item in result if item["type"] == "reasoning")
|
||||
assert reasoning_item["summary"]["text"] == "Let me analyze the request"
|
||||
assert reasoning_item["id"] == "rs_abc123", "Reasoning id must be preserved for the API"
|
||||
|
||||
|
||||
def test_prepare_messages_for_openai_full_conversation_with_reasoning() -> None:
|
||||
"""Test _prepare_messages_for_openai correctly serializes a full conversation
|
||||
that includes reasoning + function_call + function_result + final text.
|
||||
|
||||
This simulates the conversation history passed between agents in a workflow.
|
||||
The API requires reasoning items alongside function_calls.
|
||||
"""
|
||||
client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
|
||||
|
||||
messages = [
|
||||
Message(role="user", contents=[Content.from_text(text="search for hotels")]),
|
||||
Message(
|
||||
role="assistant",
|
||||
contents=[
|
||||
Content.from_text_reasoning(
|
||||
text="I need to search for hotels",
|
||||
additional_properties={"reasoning_id": "rs_test123", "status": "completed"},
|
||||
),
|
||||
Content.from_function_call(
|
||||
call_id="call_1",
|
||||
name="search_hotels",
|
||||
arguments='{"city": "Paris"}',
|
||||
additional_properties={"fc_id": "fc_test456"},
|
||||
),
|
||||
],
|
||||
),
|
||||
Message(
|
||||
role="tool",
|
||||
contents=[
|
||||
Content.from_function_result(
|
||||
call_id="call_1",
|
||||
result="Found 3 hotels in Paris",
|
||||
),
|
||||
],
|
||||
),
|
||||
Message(role="assistant", contents=[Content.from_text(text="I found hotels for you")]),
|
||||
]
|
||||
|
||||
result = client._prepare_messages_for_openai(messages)
|
||||
|
||||
types = [item.get("type") for item in result]
|
||||
assert "message" in types, "User/assistant messages should be present"
|
||||
assert "reasoning" in types, "Reasoning items must be present"
|
||||
assert "function_call" in types, "Function call items must be present"
|
||||
assert "function_call_output" in types, "Function call output must be present"
|
||||
|
||||
# Verify reasoning has id
|
||||
reasoning_items = [item for item in result if item.get("type") == "reasoning"]
|
||||
assert reasoning_items[0]["id"] == "rs_test123"
|
||||
|
||||
# Verify function_call has id
|
||||
fc_items = [item for item in result if item.get("type") == "function_call"]
|
||||
assert fc_items[0]["id"] == "fc_test456"
|
||||
|
||||
# Verify correct ordering: reasoning before function_call
|
||||
reasoning_idx = types.index("reasoning")
|
||||
fc_idx = types.index("function_call")
|
||||
assert reasoning_idx < fc_idx, "Reasoning must come before function_call"
|
||||
|
||||
|
||||
def test_prepare_message_for_openai_filters_error_content() -> None:
|
||||
"""Test that error content in messages is handled properly."""
|
||||
client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user