mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Record actual served model from Azure OpenAI (#5910)
* Record actual served model as response model for Azure OpenAI * Formatting * Fix tests * Fix pipeline error * Comments * Address review: surface served model via ChatResponse.model Apply blocking review feedback from PR #5910: - Use ChatResponse.model / ChatResponseUpdate.model as the source of truth for the Azure x-ms-served-model header value, instead of stashing it in additional_properties and overriding it again in observability. Observability already reads response.model; the chat client now overwrites it post-parse when the served-model header is present. Empirically the Azure Responses API returns the deployment alias in body.model and the actual snapshot (e.g. gpt-5-nano-2025-08-07) in this header. - Move the AZURE_OPENAI_SERVED_MODEL_HEADER constant out of observability.py and into RawOpenAIChatClient (as the SERVED_MODEL_HEADER ClassVar). The header is Azure-OpenAI-Responses-API-specific so observability does not need to know about it. - Revert the streaming text_format path to client.responses.stream(...) and drop the _pydantic_model_to_text_format_param helper. That helper imported from openai.lib._parsing._responses (a private SDK path) and the swap to responses.create(stream=True) dropped client-side output_parsed for structured-output streaming. The streaming-with-text_format path is the only one that does not surface the served-model header - documented inline. - Wrap the raw streaming responses in async with so the underlying socket closes deterministically (continuation_token retrieve + create paths). - Fix the empty-string / whitespace-only header at the source by stripping in _extract_served_model and returning None when nothing remains. - Revert unrelated formatting-only churn in _skills.py and test_mcp.py. - Update unit tests to assert against chat_response.model / update.model and add an aggregated streaming assertion plus a pin that the streaming-with-text_format path does not get the header. Verified end-to-end against Azure OpenAI Responses API: deployment alias gpt-5-nano now reports gpt-5-nano-2025-08-07 as ChatResponse.model in both the non-streaming and streaming paths. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: preserve streaming structured output finalization Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * refactor: name streaming response finalizer Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * fix: capture streaming response format after prepare Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * refactor: clarify streaming response format capture Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * test: use public API for streaming structured output Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * Inline the served-model header override at its two call sites The `_apply_served_model_header` helper was a 1-line wrapper around `_extract_served_model`. Inlining the `if served_model is not None: ...` matches the pattern already used in the streaming paths and folds the explanatory docstring onto `_extract_served_model` (which is now the single place that knows about the header). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
3bbc81554b
commit
1b6f7d80fd
@@ -651,9 +651,7 @@ def _validate_compatibility(compatibility: str | None) -> None:
|
||||
ValueError: If the value exceeds the maximum allowed length.
|
||||
"""
|
||||
if compatibility is not None and len(compatibility) > MAX_COMPATIBILITY_LENGTH:
|
||||
raise ValueError(
|
||||
f"Skill compatibility must be {MAX_COMPATIBILITY_LENGTH} characters or fewer."
|
||||
)
|
||||
raise ValueError(f"Skill compatibility must be {MAX_COMPATIBILITY_LENGTH} characters or fewer.")
|
||||
|
||||
|
||||
def _build_skill_content(
|
||||
@@ -733,6 +731,7 @@ class InlineSkill(Skill):
|
||||
instructions="Use this skill for DB tasks.",
|
||||
)
|
||||
|
||||
|
||||
@skill.resource
|
||||
def get_schema() -> str:
|
||||
return "CREATE TABLE ..."
|
||||
@@ -2613,11 +2612,7 @@ class FileSkillsSource(SkillsSource):
|
||||
|
||||
# Reject absolute paths (check both POSIX and Windows-style roots
|
||||
# so validation is consistent regardless of the host OS)
|
||||
if (
|
||||
os.path.isabs(directory)
|
||||
or normalized.startswith("/")
|
||||
or re.match(r"^[A-Za-z]:[/\\]", directory)
|
||||
):
|
||||
if os.path.isabs(directory) or normalized.startswith("/") or re.match(r"^[A-Za-z]:[/\\]", directory):
|
||||
logger.warning(
|
||||
"Skipping directory '%s': absolute paths are not allowed.",
|
||||
directory,
|
||||
|
||||
@@ -2567,10 +2567,15 @@ async def test_shared_local_storage_cross_provider_responses_history_does_not_le
|
||||
responses_second.incomplete = None
|
||||
responses_second.output = [responses_text_item]
|
||||
|
||||
def _as_raw(resp: MagicMock) -> MagicMock:
|
||||
resp.parse = MagicMock(return_value=resp)
|
||||
resp.headers = {}
|
||||
return resp
|
||||
|
||||
with patch.object(
|
||||
responses_client.client.responses,
|
||||
responses_client.client.responses.with_raw_response,
|
||||
"create",
|
||||
side_effect=[responses_first, responses_second],
|
||||
side_effect=[_as_raw(responses_first), _as_raw(responses_second)],
|
||||
) as mock_responses_create:
|
||||
responses_result = await responses_agent.run("Find me a hotel in Paris", session=session)
|
||||
|
||||
|
||||
@@ -4227,9 +4227,7 @@ async def test_mcp_tool_call_tool_forwards_tool_list_meta():
|
||||
self.session.call_tool = AsyncMock(
|
||||
return_value=types.CallToolResult(content=[types.TextContent(type="text", text="result")])
|
||||
)
|
||||
self.session.list_prompts = AsyncMock(
|
||||
return_value=types.ListPromptsResult(prompts=[])
|
||||
)
|
||||
self.session.list_prompts = AsyncMock(return_value=types.ListPromptsResult(prompts=[]))
|
||||
|
||||
def get_mcp_client(self) -> _AsyncGeneratorContextManager[Any, None]:
|
||||
return None
|
||||
|
||||
@@ -86,12 +86,28 @@ def _with_foundry_debug() -> Any:
|
||||
return decorator
|
||||
|
||||
|
||||
def _as_raw(mock_response: MagicMock) -> MagicMock:
|
||||
"""Wrap ``mock_response`` so it looks like an OpenAI ``with_raw_response`` wrapper.
|
||||
|
||||
The chat client now calls ``responses.with_raw_response.{create,parse}`` and then
|
||||
``.parse()`` on the returned wrapper to get the actual response payload, plus
|
||||
``.headers`` to surface the ``x-ms-served-model`` Azure header.
|
||||
"""
|
||||
mock_response.parse = MagicMock(return_value=mock_response)
|
||||
mock_response.headers = {}
|
||||
return mock_response
|
||||
|
||||
|
||||
def _make_mock_openai_client() -> MagicMock:
|
||||
client = MagicMock()
|
||||
client.default_headers = {}
|
||||
client.responses = MagicMock()
|
||||
client.responses.create = AsyncMock()
|
||||
client.responses.parse = AsyncMock()
|
||||
client.responses.with_raw_response = MagicMock()
|
||||
client.responses.with_raw_response.create = AsyncMock()
|
||||
client.responses.with_raw_response.parse = AsyncMock()
|
||||
client.responses.with_raw_response.retrieve = AsyncMock()
|
||||
client.files = MagicMock()
|
||||
client.files.create = AsyncMock()
|
||||
client.files.delete = AsyncMock()
|
||||
@@ -470,7 +486,7 @@ async def test_content_filter_exception() -> None:
|
||||
body={"error": {"code": "content_filter", "message": "Content filter error"}},
|
||||
)
|
||||
mock_error.code = "content_filter"
|
||||
client.client.responses.create.side_effect = mock_error
|
||||
client.client.responses.with_raw_response.create.side_effect = mock_error
|
||||
|
||||
with pytest.raises(OpenAIContentFilterException) as exc_info:
|
||||
await client.get_response(messages=[Message(role="user", contents=["Test message"])])
|
||||
@@ -494,7 +510,7 @@ async def test_response_format_parse_path() -> None:
|
||||
mock_parsed_response.usage = None
|
||||
mock_parsed_response.finish_reason = None
|
||||
mock_parsed_response.conversation = None
|
||||
client.client.responses.parse = AsyncMock(return_value=mock_parsed_response)
|
||||
client.client.responses.with_raw_response.parse = AsyncMock(return_value=_as_raw(mock_parsed_response))
|
||||
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
@@ -522,7 +538,7 @@ async def test_response_format_parse_path_with_conversation_id() -> None:
|
||||
mock_parsed_response.finish_reason = None
|
||||
mock_parsed_response.conversation = MagicMock()
|
||||
mock_parsed_response.conversation.id = "conversation_456"
|
||||
client.client.responses.parse = AsyncMock(return_value=mock_parsed_response)
|
||||
client.client.responses.with_raw_response.parse = AsyncMock(return_value=_as_raw(mock_parsed_response))
|
||||
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
@@ -562,7 +578,7 @@ async def test_response_format_dict_parse_path() -> None:
|
||||
mock_message_item.type = "message"
|
||||
mock_message_item.content = [mock_message_content]
|
||||
mock_response.output = [mock_message_item]
|
||||
client.client.responses.create = AsyncMock(return_value=mock_response)
|
||||
client.client.responses.with_raw_response.create = AsyncMock(return_value=_as_raw(mock_response))
|
||||
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
@@ -587,7 +603,7 @@ async def test_bad_request_error_non_content_filter() -> None:
|
||||
body={"error": {"code": "invalid_request", "message": "Invalid request"}},
|
||||
)
|
||||
mock_error.code = "invalid_request"
|
||||
client.client.responses.parse = AsyncMock(side_effect=mock_error)
|
||||
client.client.responses.with_raw_response.parse = AsyncMock(side_effect=mock_error)
|
||||
|
||||
with pytest.raises(ChatClientException) as exc_info:
|
||||
await client.get_response(
|
||||
|
||||
@@ -359,6 +359,14 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
STORES_BY_DEFAULT: ClassVar[bool] = True # type: ignore[reportIncompatibleVariableOverride, misc]
|
||||
SUPPORTS_RICH_FUNCTION_OUTPUT: ClassVar[bool] = True
|
||||
|
||||
# Azure OpenAI Responses API may include this header in responses naming the actual model that
|
||||
# served the request (e.g. ``gpt-5-nano-2025-08-07``), which can differ from the deployment alias
|
||||
# that the request was addressed to and that ``response.model`` reports. When present, we use it
|
||||
# as the value of ``ChatResponse.model`` / ``ChatResponseUpdate.model`` so telemetry and callers
|
||||
# see the actually served model. (Chat Completions API already returns the snapshot in
|
||||
# ``response.model``, so this header only matters for the Responses API.)
|
||||
SERVED_MODEL_HEADER: ClassVar[str] = "x-ms-served-model"
|
||||
|
||||
FILE_SEARCH_MAX_RESULTS: int = 50
|
||||
|
||||
@overload
|
||||
@@ -606,25 +614,40 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
function_call_ids: dict[int, tuple[str, str]] = {}
|
||||
seen_reasoning_delta_item_ids: set[str] = set()
|
||||
validated_options: dict[str, Any] | None = None
|
||||
# Captured once request options are validated/prepared so the streaming finalizer can
|
||||
# still parse the aggregated response into structured output after the stream completes.
|
||||
response_format: Any | None = None
|
||||
|
||||
def _finalize_with_captured_format(updates: Sequence[ChatResponseUpdate]) -> ChatResponse[Any]:
|
||||
# ResponseStream only calls the finalizer after iterating or draining `_stream()`,
|
||||
# so `response_format` has already been populated from the validated request state
|
||||
# unless request setup failed before streaming began.
|
||||
return self._finalize_response_updates(updates, response_format=response_format)
|
||||
|
||||
async def _stream() -> AsyncIterable[ChatResponseUpdate]:
|
||||
nonlocal validated_options
|
||||
nonlocal response_format, validated_options
|
||||
if continuation_token is not None:
|
||||
# Resume a background streaming response by retrieving with stream=True
|
||||
client = self.client
|
||||
validated_options = await self._validate_options(options)
|
||||
response_format = validated_options.get("response_format")
|
||||
try:
|
||||
stream_response = await client.responses.retrieve(
|
||||
raw_stream_response = await client.responses.with_raw_response.retrieve(
|
||||
continuation_token["response_id"],
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in stream_response:
|
||||
yield self._parse_chunk_from_openai(
|
||||
chunk,
|
||||
options=validated_options,
|
||||
function_call_ids=function_call_ids,
|
||||
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
|
||||
)
|
||||
served_model = self._extract_served_model(raw_stream_response.headers)
|
||||
async with raw_stream_response.parse() as stream_response:
|
||||
async for chunk in stream_response:
|
||||
update = self._parse_chunk_from_openai(
|
||||
chunk,
|
||||
options=validated_options,
|
||||
function_call_ids=function_call_ids,
|
||||
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
|
||||
)
|
||||
if served_model is not None:
|
||||
update.model = served_model
|
||||
yield update
|
||||
except Exception as ex:
|
||||
self._handle_request_error(ex)
|
||||
else:
|
||||
@@ -633,8 +656,15 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
run_options,
|
||||
validated_options,
|
||||
) = await self._prepare_request(messages, options)
|
||||
response_format = validated_options.get("response_format")
|
||||
try:
|
||||
if "text_format" in run_options:
|
||||
# The SDK's ``responses.stream(text_format=...)`` helper preserves
|
||||
# client-side ``output_parsed`` partial parsing for structured outputs,
|
||||
# but it does not expose the raw HTTP response (no ``x-ms-served-model``
|
||||
# access). We accept that trade-off: this single streaming path keeps
|
||||
# the deployment alias as the reported model name. All other paths
|
||||
# surface the served-model header.
|
||||
async with client.responses.stream(**run_options) as response:
|
||||
async for chunk in response:
|
||||
yield self._parse_chunk_from_openai(
|
||||
@@ -644,18 +674,25 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
|
||||
)
|
||||
else:
|
||||
async for chunk in await client.responses.create(stream=True, **run_options):
|
||||
yield self._parse_chunk_from_openai(
|
||||
chunk,
|
||||
options=validated_options,
|
||||
function_call_ids=function_call_ids,
|
||||
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
|
||||
)
|
||||
raw_create_response = await client.responses.with_raw_response.create(
|
||||
stream=True, **run_options
|
||||
)
|
||||
served_model = self._extract_served_model(raw_create_response.headers)
|
||||
async with raw_create_response.parse() as stream_response:
|
||||
async for chunk in stream_response:
|
||||
update = self._parse_chunk_from_openai(
|
||||
chunk,
|
||||
options=validated_options,
|
||||
function_call_ids=function_call_ids,
|
||||
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
|
||||
)
|
||||
if served_model is not None:
|
||||
update.model = served_model
|
||||
yield update
|
||||
except Exception as ex:
|
||||
self._handle_request_error(ex)
|
||||
|
||||
response_format = validated_options.get("response_format") if validated_options else None
|
||||
return self._build_response_stream(_stream(), response_format=response_format)
|
||||
return ResponseStream(_stream(), finalizer=_finalize_with_captured_format)
|
||||
|
||||
# Non-streaming
|
||||
async def _get_response() -> ChatResponse:
|
||||
@@ -664,10 +701,14 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
client = self.client
|
||||
validated_options = await self._validate_options(options)
|
||||
try:
|
||||
response = await client.responses.retrieve(continuation_token["response_id"])
|
||||
raw_response = await client.responses.with_raw_response.retrieve(continuation_token["response_id"])
|
||||
response = raw_response.parse()
|
||||
except Exception as ex:
|
||||
self._handle_request_error(ex)
|
||||
chat_response = self._parse_response_from_openai(response, options=validated_options)
|
||||
served_model = self._extract_served_model(raw_response.headers)
|
||||
if served_model is not None:
|
||||
chat_response.model = served_model
|
||||
# Once the background response completes, drop the continuation_token from
|
||||
# the caller's options dict. FunctionInvocationLayer reuses the same dict
|
||||
# across tool-loop iterations, so leaving it in place makes the next iteration
|
||||
@@ -680,15 +721,39 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
client, run_options, validated_options = await self._prepare_request(messages, options)
|
||||
try:
|
||||
if "text_format" in run_options:
|
||||
response = await client.responses.parse(stream=False, **run_options)
|
||||
raw_response = await client.responses.with_raw_response.parse(stream=False, **run_options) # type: ignore
|
||||
else:
|
||||
response = await client.responses.create(stream=False, **run_options)
|
||||
raw_response = await client.responses.with_raw_response.create(stream=False, **run_options) # type: ignore
|
||||
response = raw_response.parse()
|
||||
except Exception as ex:
|
||||
self._handle_request_error(ex)
|
||||
return self._parse_response_from_openai(response, options=validated_options)
|
||||
chat_response = self._parse_response_from_openai(response, options=validated_options)
|
||||
served_model = self._extract_served_model(raw_response.headers)
|
||||
if served_model is not None:
|
||||
chat_response.model = served_model
|
||||
return chat_response
|
||||
|
||||
return _get_response()
|
||||
|
||||
@classmethod
|
||||
def _extract_served_model(cls, headers: Any) -> str | None:
|
||||
"""Return the Azure OpenAI ``x-ms-served-model`` response header value when present.
|
||||
|
||||
Azure OpenAI Responses API returns the deployment alias in ``response.model`` but the actual
|
||||
snapshot served via the ``x-ms-served-model`` response header (e.g. ``gpt-5-nano-2025-08-07``
|
||||
vs deployment alias ``gpt-5-nano``). When present, the served snapshot is the source of truth
|
||||
for observability and downstream callers. Empty/whitespace-only header values are rejected
|
||||
here so every caller can simply check ``if served_model is not None``.
|
||||
"""
|
||||
if headers is None:
|
||||
return None
|
||||
served_model = headers.get(cls.SERVED_MODEL_HEADER)
|
||||
if isinstance(served_model, str):
|
||||
stripped = served_model.strip()
|
||||
if stripped:
|
||||
return stripped
|
||||
return None
|
||||
|
||||
def _prepare_response_and_text_format(
|
||||
self,
|
||||
*,
|
||||
@@ -1429,9 +1494,10 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
props = content.additional_properties or {}
|
||||
# Local-shell variant serializes as `local_shell_call` carrying a server-issued id;
|
||||
# plain function_call_output pairs by call_id and is safe under storage.
|
||||
if (
|
||||
props.get(OPENAI_SHELL_OUTPUT_TYPE_KEY) == OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL
|
||||
and props.get(OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY)
|
||||
if props.get(
|
||||
OPENAI_SHELL_OUTPUT_TYPE_KEY
|
||||
) == OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL and props.get(
|
||||
OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY
|
||||
):
|
||||
continue
|
||||
new_args: dict[str, Any] = {}
|
||||
|
||||
@@ -72,9 +72,10 @@ class OutputStruct(BaseModel):
|
||||
|
||||
|
||||
class _FakeAsyncEventStream:
|
||||
def __init__(self, events: list[object]) -> None:
|
||||
def __init__(self, events: list[object], headers: dict[str, str] | None = None) -> None:
|
||||
self._events = events
|
||||
self._iterator = iter(())
|
||||
self._headers = headers or {}
|
||||
|
||||
def __aiter__(self) -> "_FakeAsyncEventStream":
|
||||
self._iterator = iter(self._events)
|
||||
@@ -86,6 +87,45 @@ class _FakeAsyncEventStream:
|
||||
except StopIteration as exc:
|
||||
raise StopAsyncIteration from exc
|
||||
|
||||
# The chat client now consumes the streaming response via ``with_raw_response``,
|
||||
# which returns a wrapper exposing ``.parse()`` (the underlying iterable) and
|
||||
# ``.headers``. The chat client then ``async with``-s the parsed stream so the
|
||||
# underlying socket is closed deterministically. Mimic both interfaces here so
|
||||
# test mocks remain a single object.
|
||||
def parse(self) -> "_FakeAsyncEventStream":
|
||||
return self
|
||||
|
||||
@property
|
||||
def headers(self) -> dict[str, str]:
|
||||
return self._headers
|
||||
|
||||
async def __aenter__(self) -> "_FakeAsyncEventStream":
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc: BaseException | None,
|
||||
traceback: object | None,
|
||||
) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def _as_raw(mock_response: MagicMock, *, headers: dict[str, str] | None = None) -> MagicMock:
|
||||
"""Make ``mock_response`` look like an OpenAI ``with_raw_response`` wrapper.
|
||||
|
||||
The chat client now calls ``responses.with_raw_response.{create,parse,retrieve}``
|
||||
and then ``.parse()`` on the returned wrapper to get the actual response payload,
|
||||
plus ``.headers`` to surface the ``x-ms-served-model`` Azure header. Tests still
|
||||
patch the underlying ``responses.{create,parse,retrieve}`` methods (the SDK's
|
||||
raw-response wrapper internally delegates to these), so the patched return value
|
||||
is what our code unwraps. Setting ``mock_response.parse`` to return the mock
|
||||
itself lets the existing assertions on ``mock_response.id`` etc. continue to work.
|
||||
"""
|
||||
mock_response.parse = MagicMock(return_value=mock_response)
|
||||
mock_response.headers = headers or {}
|
||||
return mock_response
|
||||
|
||||
|
||||
class _FakeAsyncEventStreamContext(_FakeAsyncEventStream):
|
||||
async def __aenter__(self) -> "_FakeAsyncEventStreamContext":
|
||||
@@ -477,7 +517,7 @@ async def test_response_format_parse_path() -> None:
|
||||
mock_parsed_response.finish_reason = None
|
||||
mock_parsed_response.conversation = None # No conversation object
|
||||
|
||||
with patch.object(client.client.responses, "parse", return_value=mock_parsed_response):
|
||||
with patch.object(client.client.responses, "parse", return_value=_as_raw(mock_parsed_response)):
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
options={"response_format": OutputStruct, "store": True},
|
||||
@@ -504,7 +544,7 @@ async def test_response_format_parse_path_with_conversation_id() -> None:
|
||||
mock_parsed_response.conversation = MagicMock()
|
||||
mock_parsed_response.conversation.id = "conversation_456"
|
||||
|
||||
with patch.object(client.client.responses, "parse", return_value=mock_parsed_response):
|
||||
with patch.object(client.client.responses, "parse", return_value=_as_raw(mock_parsed_response)):
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
options={"response_format": OutputStruct, "store": True},
|
||||
@@ -542,7 +582,7 @@ async def test_response_format_dict_parse_path() -> None:
|
||||
mock_message_item.content = [mock_message_content]
|
||||
mock_response.output = [mock_message_item]
|
||||
|
||||
with patch.object(client.client.responses, "create", return_value=mock_response):
|
||||
with patch.object(client.client.responses, "create", return_value=_as_raw(mock_response)):
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
options={"response_format": response_format},
|
||||
@@ -554,6 +594,297 @@ async def test_response_format_dict_parse_path() -> None:
|
||||
assert response.value["answer"] == "Parsed"
|
||||
|
||||
|
||||
_SERVED_MODEL_HEADER = "x-ms-served-model"
|
||||
|
||||
|
||||
async def test_served_model_header_overrides_response_model() -> None:
|
||||
"""The ``x-ms-served-model`` Azure response header should overwrite ChatResponse.model."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.id = "response_123"
|
||||
mock_response.model = "test-model" # deployment alias returned in the body
|
||||
mock_response.created_at = 1000000000
|
||||
mock_response.metadata = {}
|
||||
mock_response.output_parsed = None
|
||||
mock_response.output = []
|
||||
mock_response.usage = None
|
||||
mock_response.finish_reason = None
|
||||
mock_response.conversation = None
|
||||
mock_response.status = "completed"
|
||||
|
||||
raw = _as_raw(mock_response, headers={_SERVED_MODEL_HEADER: "gpt-4o-2024-08-06"})
|
||||
|
||||
with patch.object(client.client.responses, "create", return_value=raw):
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
)
|
||||
|
||||
assert response.model == "gpt-4o-2024-08-06"
|
||||
|
||||
|
||||
async def test_served_model_header_absent_keeps_response_model() -> None:
|
||||
"""When the served-model header is missing ChatResponse.model should come from the response body."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.id = "response_123"
|
||||
mock_response.model = "test-model"
|
||||
mock_response.created_at = 1000000000
|
||||
mock_response.metadata = {}
|
||||
mock_response.output_parsed = None
|
||||
mock_response.output = []
|
||||
mock_response.usage = None
|
||||
mock_response.finish_reason = None
|
||||
mock_response.conversation = None
|
||||
mock_response.status = "completed"
|
||||
|
||||
# _as_raw sets headers to {} by default — i.e. no x-ms-served-model.
|
||||
with patch.object(client.client.responses, "create", return_value=_as_raw(mock_response)):
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
)
|
||||
|
||||
assert response.model == "test-model"
|
||||
|
||||
|
||||
async def test_served_model_header_empty_string_does_not_override() -> None:
|
||||
"""Empty/whitespace header values should not overwrite the response body's model name."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.id = "response_123"
|
||||
mock_response.model = "test-model"
|
||||
mock_response.created_at = 1000000000
|
||||
mock_response.metadata = {}
|
||||
mock_response.output_parsed = None
|
||||
mock_response.output = []
|
||||
mock_response.usage = None
|
||||
mock_response.finish_reason = None
|
||||
mock_response.conversation = None
|
||||
mock_response.status = "completed"
|
||||
|
||||
raw = _as_raw(mock_response, headers={_SERVED_MODEL_HEADER: " "})
|
||||
|
||||
with patch.object(client.client.responses, "create", return_value=raw):
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
)
|
||||
|
||||
assert response.model == "test-model"
|
||||
|
||||
|
||||
async def test_served_model_header_captured_on_parse_path() -> None:
|
||||
"""The served-model header should also be captured on the structured-output (parse) path."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
mock_parsed_response = MagicMock()
|
||||
mock_parsed_response.id = "parsed_response_123"
|
||||
mock_parsed_response.text = "Parsed response"
|
||||
mock_parsed_response.model = "test-model"
|
||||
mock_parsed_response.created_at = 1000000000
|
||||
mock_parsed_response.metadata = {}
|
||||
mock_parsed_response.output_parsed = None
|
||||
mock_parsed_response.usage = None
|
||||
mock_parsed_response.finish_reason = None
|
||||
mock_parsed_response.conversation = None
|
||||
|
||||
raw = _as_raw(mock_parsed_response, headers={_SERVED_MODEL_HEADER: "gpt-4o-2024-08-06"})
|
||||
|
||||
with patch.object(client.client.responses, "parse", return_value=raw):
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["Test message"])],
|
||||
options={"response_format": OutputStruct, "store": True},
|
||||
)
|
||||
|
||||
assert response.model == "gpt-4o-2024-08-06"
|
||||
|
||||
|
||||
async def test_served_model_header_propagated_to_streaming_updates() -> None:
|
||||
"""In streaming mode the served-model header should overwrite update.model on every chunk."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
events = [
|
||||
ResponseTextDeltaEvent(
|
||||
type="response.output_text.delta",
|
||||
content_index=0,
|
||||
item_id="text_item",
|
||||
output_index=0,
|
||||
sequence_number=1,
|
||||
logprobs=[],
|
||||
delta="Hello",
|
||||
),
|
||||
ResponseTextDeltaEvent(
|
||||
type="response.output_text.delta",
|
||||
content_index=0,
|
||||
item_id="text_item",
|
||||
output_index=0,
|
||||
sequence_number=2,
|
||||
logprobs=[],
|
||||
delta=" world",
|
||||
),
|
||||
]
|
||||
|
||||
fake_stream = _FakeAsyncEventStream(events, headers={_SERVED_MODEL_HEADER: "gpt-4o-2024-08-06"})
|
||||
|
||||
with (
|
||||
patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))),
|
||||
patch.object(client.client.responses, "create", new=AsyncMock(return_value=fake_stream)),
|
||||
patch.object(client, "_get_metadata_from_response", return_value={}),
|
||||
):
|
||||
stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True)
|
||||
updates = [update async for update in stream]
|
||||
|
||||
assert updates, "Expected at least one streaming update"
|
||||
for update in updates:
|
||||
assert update.model == "gpt-4o-2024-08-06"
|
||||
|
||||
|
||||
async def test_served_model_header_aggregates_into_final_streaming_response() -> None:
|
||||
"""Aggregating updates via to_chat_response() should preserve the served-model value."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
events = [
|
||||
ResponseTextDeltaEvent(
|
||||
type="response.output_text.delta",
|
||||
content_index=0,
|
||||
item_id="text_item",
|
||||
output_index=0,
|
||||
sequence_number=1,
|
||||
logprobs=[],
|
||||
delta="Hello",
|
||||
),
|
||||
]
|
||||
|
||||
fake_stream = _FakeAsyncEventStream(events, headers={_SERVED_MODEL_HEADER: "gpt-4o-2024-08-06"})
|
||||
|
||||
with (
|
||||
patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))),
|
||||
patch.object(client.client.responses, "create", new=AsyncMock(return_value=fake_stream)),
|
||||
patch.object(client, "_get_metadata_from_response", return_value={}),
|
||||
):
|
||||
stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True)
|
||||
updates = [update async for update in stream]
|
||||
|
||||
final = ChatResponse.from_updates(updates)
|
||||
assert final.model == "gpt-4o-2024-08-06"
|
||||
|
||||
|
||||
async def test_served_model_header_absent_in_streaming_updates() -> None:
|
||||
"""When the header is missing in streaming mode update.model should fall back to the deployment alias."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
events = [
|
||||
ResponseTextDeltaEvent(
|
||||
type="response.output_text.delta",
|
||||
content_index=0,
|
||||
item_id="text_item",
|
||||
output_index=0,
|
||||
sequence_number=1,
|
||||
logprobs=[],
|
||||
delta="Hello",
|
||||
),
|
||||
]
|
||||
|
||||
fake_stream = _FakeAsyncEventStream(events) # default empty headers
|
||||
|
||||
with (
|
||||
patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))),
|
||||
patch.object(client.client.responses, "create", new=AsyncMock(return_value=fake_stream)),
|
||||
patch.object(client, "_get_metadata_from_response", return_value={}),
|
||||
):
|
||||
stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True)
|
||||
updates = [update async for update in stream]
|
||||
|
||||
assert updates, "Expected at least one streaming update"
|
||||
for update in updates:
|
||||
# Without the header, _parse_chunk_from_openai's default is the client's model name.
|
||||
assert update.model == "test-model"
|
||||
|
||||
|
||||
async def test_served_model_header_not_captured_for_streaming_text_format() -> None:
|
||||
"""The streaming structured-output path uses ``responses.stream(...)`` and therefore cannot
|
||||
surface the served-model header. Pin this behavior so any future change is intentional."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
events = [
|
||||
ResponseTextDeltaEvent(
|
||||
type="response.output_text.delta",
|
||||
content_index=0,
|
||||
item_id="text_item",
|
||||
output_index=0,
|
||||
sequence_number=1,
|
||||
logprobs=[],
|
||||
delta="Hello",
|
||||
),
|
||||
]
|
||||
|
||||
# `responses.stream(...)` returns an async context manager. The headers attribute
|
||||
# is irrelevant because this code path never asks for it.
|
||||
fake_stream_ctx = _FakeAsyncEventStreamContext(events)
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
client,
|
||||
"_prepare_request",
|
||||
new=AsyncMock(return_value=(client.client, {"text_format": OutputStruct}, {})),
|
||||
),
|
||||
patch.object(client.client.responses, "stream", return_value=fake_stream_ctx),
|
||||
patch.object(client, "_get_metadata_from_response", return_value={}),
|
||||
):
|
||||
stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True)
|
||||
updates = [update async for update in stream]
|
||||
|
||||
assert updates, "Expected at least one streaming update"
|
||||
for update in updates:
|
||||
# No header override; model stays the deployment alias.
|
||||
assert update.model == "test-model"
|
||||
|
||||
|
||||
async def test_streaming_text_format_preserves_final_structured_output() -> None:
|
||||
"""Streaming structured output should still parse into the final ChatResponse value."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
events = [
|
||||
ResponseTextDeltaEvent(
|
||||
type="response.output_text.delta",
|
||||
content_index=0,
|
||||
item_id="text_item",
|
||||
output_index=0,
|
||||
sequence_number=1,
|
||||
logprobs=[],
|
||||
delta='{"location":"Seattle","weather":"Sunny"}',
|
||||
),
|
||||
]
|
||||
|
||||
fake_stream_ctx = _FakeAsyncEventStreamContext(events)
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
client,
|
||||
"_prepare_request",
|
||||
new=AsyncMock(
|
||||
return_value=(
|
||||
client.client,
|
||||
{"text_format": OutputStruct},
|
||||
{"response_format": OutputStruct},
|
||||
)
|
||||
),
|
||||
),
|
||||
patch.object(client.client.responses, "stream", return_value=fake_stream_ctx),
|
||||
patch.object(client, "_get_metadata_from_response", return_value={}),
|
||||
):
|
||||
stream = client.get_response(
|
||||
messages=[Message(role="user", contents=["Hi"])],
|
||||
options={"response_format": OutputStruct},
|
||||
stream=True,
|
||||
)
|
||||
response = await stream.get_final_response()
|
||||
|
||||
assert response.model == "test-model"
|
||||
assert response.value == OutputStruct(location="Seattle", weather="Sunny")
|
||||
|
||||
|
||||
async def test_bad_request_error_non_content_filter() -> None:
|
||||
"""Test get_response BadRequestError without content_filter."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
@@ -953,7 +1284,9 @@ async def test_local_shell_tool_is_invoked_in_function_loop() -> None:
|
||||
mock_text_item.content = [mock_text_content]
|
||||
mock_response2.output = [mock_text_item]
|
||||
|
||||
with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create:
|
||||
with patch.object(
|
||||
client.client.responses, "create", side_effect=[_as_raw(mock_response1), _as_raw(mock_response2)]
|
||||
) as mock_create:
|
||||
await client.get_response(
|
||||
messages=[Message(role="user", contents=["What Python version is available?"])],
|
||||
options={"tools": [local_shell_tool]},
|
||||
@@ -1026,7 +1359,9 @@ async def test_shell_call_is_invoked_as_local_shell_function_loop() -> None:
|
||||
mock_text_item.content = [mock_text_content]
|
||||
mock_response2.output = [mock_text_item]
|
||||
|
||||
with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create:
|
||||
with patch.object(
|
||||
client.client.responses, "create", side_effect=[_as_raw(mock_response1), _as_raw(mock_response2)]
|
||||
) as mock_create:
|
||||
await client.get_response(
|
||||
messages=[Message(role="user", contents=["What Python version is available?"])],
|
||||
options={"tools": [local_shell_tool]},
|
||||
@@ -1097,7 +1432,9 @@ async def test_tool_loop_store_false_omits_reasoning_items_from_second_request()
|
||||
mock_text_item.content = [mock_text_content]
|
||||
mock_response2.output = [mock_text_item]
|
||||
|
||||
with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create:
|
||||
with patch.object(
|
||||
client.client.responses, "create", side_effect=[_as_raw(mock_response1), _as_raw(mock_response2)]
|
||||
) as mock_create:
|
||||
response = await client.get_response(
|
||||
messages=[Message(role="user", contents=["What's the weather in Amsterdam?"])],
|
||||
options={
|
||||
@@ -2810,7 +3147,9 @@ async def test_end_to_end_mcp_approval_flow(span_exporter) -> None:
|
||||
mock_response2.output = [mock_text_item]
|
||||
|
||||
# Patch the create call to return the two mocked responses in sequence
|
||||
with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create:
|
||||
with patch.object(
|
||||
client.client.responses, "create", side_effect=[_as_raw(mock_response1), _as_raw(mock_response2)]
|
||||
) as mock_create:
|
||||
# First call: get the approval request
|
||||
response = await client.get_response(messages=[Message(role="user", contents=["Trigger approval"])])
|
||||
assert response.messages[0].contents[0].type == "function_approval_request"
|
||||
@@ -4120,9 +4459,7 @@ async def test_prepare_options_with_conversation_id_strips_server_items_for_mixe
|
||||
types = [item.get("type") for item in options["input"]]
|
||||
assert "reasoning" not in types
|
||||
assert "function_call" not in types
|
||||
output_call_ids = {
|
||||
item["call_id"] for item in options["input"] if item.get("type") == "function_call_output"
|
||||
}
|
||||
output_call_ids = {item["call_id"] for item in options["input"] if item.get("type") == "function_call_output"}
|
||||
assert output_call_ids == {"call_history", "call_live"}
|
||||
assert options["previous_response_id"] == "resp_prev123"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user