Python: Record actual served model from Azure OpenAI (#5910)

* Record actual served model as response model for Azure OpenAI * Formatting * Fix tests * Fix pipeline error * Comments * Address review: surface served model via ChatResponse.model Apply blocking review feedback from PR #5910: - Use ChatResponse.model / ChatResponseUpdate.model as the source of truth for the Azure x-ms-served-model header value, instead of stashing it in additional_properties and overriding it again in observability. Observability already reads response.model; the chat client now overwrites it post-parse when the served-model header is present. Empirically the Azure Responses API returns the deployment alias in body.model and the actual snapshot (e.g. gpt-5-nano-2025-08-07) in this header. - Move the AZURE_OPENAI_SERVED_MODEL_HEADER constant out of observability.py and into RawOpenAIChatClient (as the SERVED_MODEL_HEADER ClassVar). The header is Azure-OpenAI-Responses-API-specific so observability does not need to know about it. - Revert the streaming text_format path to client.responses.stream(...) and drop the _pydantic_model_to_text_format_param helper. That helper imported from openai.lib._parsing._responses (a private SDK path) and the swap to responses.create(stream=True) dropped client-side output_parsed for structured-output streaming. The streaming-with-text_format path is the only one that does not surface the served-model header - documented inline. - Wrap the raw streaming responses in async with so the underlying socket closes deterministically (continuation_token retrieve + create paths). - Fix the empty-string / whitespace-only header at the source by stripping in _extract_served_model and returning None when nothing remains. - Revert unrelated formatting-only churn in _skills.py and test_mcp.py. - Update unit tests to assert against chat_response.model / update.model and add an aggregated streaming assertion plus a pin that the streaming-with-text_format path does not get the header. Verified end-to-end against Azure OpenAI Responses API: deployment alias gpt-5-nano now reports gpt-5-nano-2025-08-07 as ChatResponse.model in both the non-streaming and streaming paths. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: preserve streaming structured output finalization Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * refactor: name streaming response finalizer Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * fix: capture streaming response format after prepare Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * refactor: clarify streaming response format capture Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * test: use public API for streaming structured output Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/f62076ef-558d-49e8-8fe2-f38d527c9639 Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> * Inline the served-model header override at its two call sites The `_apply_served_model_header` helper was a 1-line wrapper around `_extract_served_model`. Inlining the `if served_model is not None: ...` matches the pattern already used in the streaming paths and folds the explanatory docstring onto `_extract_served_model` (which is now the single place that knows about the header). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com>
2026-06-16 21:04:09 +08:00 · 2026-05-18 23:38:53 -07:00
parent 3bbc81554b
commit 1b6f7d80fd
6 changed files with 471 additions and 54 deletions
@@ -86,12 +86,28 @@ def _with_foundry_debug() -> Any:
    return decorator


+def _as_raw(mock_response: MagicMock) -> MagicMock:
+    """Wrap ``mock_response`` so it looks like an OpenAI ``with_raw_response`` wrapper.
+
+    The chat client now calls ``responses.with_raw_response.{create,parse}`` and then
+    ``.parse()`` on the returned wrapper to get the actual response payload, plus
+    ``.headers`` to surface the ``x-ms-served-model`` Azure header.
+    """
+    mock_response.parse = MagicMock(return_value=mock_response)
+    mock_response.headers = {}
+    return mock_response
+
+
 def _make_mock_openai_client() -> MagicMock:
    client = MagicMock()
    client.default_headers = {}
    client.responses = MagicMock()
    client.responses.create = AsyncMock()
    client.responses.parse = AsyncMock()
+    client.responses.with_raw_response = MagicMock()
+    client.responses.with_raw_response.create = AsyncMock()
+    client.responses.with_raw_response.parse = AsyncMock()
+    client.responses.with_raw_response.retrieve = AsyncMock()
    client.files = MagicMock()
    client.files.create = AsyncMock()
    client.files.delete = AsyncMock()
@@ -470,7 +486,7 @@ async def test_content_filter_exception() -> None:
        body={"error": {"code": "content_filter", "message": "Content filter error"}},
    )
    mock_error.code = "content_filter"
-    client.client.responses.create.side_effect = mock_error
+    client.client.responses.with_raw_response.create.side_effect = mock_error

    with pytest.raises(OpenAIContentFilterException) as exc_info:
        await client.get_response(messages=[Message(role="user", contents=["Test message"])])
@@ -494,7 +510,7 @@ async def test_response_format_parse_path() -> None:
    mock_parsed_response.usage = None
    mock_parsed_response.finish_reason = None
    mock_parsed_response.conversation = None
-    client.client.responses.parse = AsyncMock(return_value=mock_parsed_response)
+    client.client.responses.with_raw_response.parse = AsyncMock(return_value=_as_raw(mock_parsed_response))

    response = await client.get_response(
        messages=[Message(role="user", contents=["Test message"])],
@@ -522,7 +538,7 @@ async def test_response_format_parse_path_with_conversation_id() -> None:
    mock_parsed_response.finish_reason = None
    mock_parsed_response.conversation = MagicMock()
    mock_parsed_response.conversation.id = "conversation_456"
-    client.client.responses.parse = AsyncMock(return_value=mock_parsed_response)
+    client.client.responses.with_raw_response.parse = AsyncMock(return_value=_as_raw(mock_parsed_response))

    response = await client.get_response(
        messages=[Message(role="user", contents=["Test message"])],
@@ -562,7 +578,7 @@ async def test_response_format_dict_parse_path() -> None:
    mock_message_item.type = "message"
    mock_message_item.content = [mock_message_content]
    mock_response.output = [mock_message_item]
-    client.client.responses.create = AsyncMock(return_value=mock_response)
+    client.client.responses.with_raw_response.create = AsyncMock(return_value=_as_raw(mock_response))

    response = await client.get_response(
        messages=[Message(role="user", contents=["Test message"])],
@@ -587,7 +603,7 @@ async def test_bad_request_error_non_content_filter() -> None:
        body={"error": {"code": "invalid_request", "message": "Invalid request"}},
    )
    mock_error.code = "invalid_request"
-    client.client.responses.parse = AsyncMock(side_effect=mock_error)
+    client.client.responses.with_raw_response.parse = AsyncMock(side_effect=mock_error)

    with pytest.raises(ChatClientException) as exc_info:
        await client.get_response(