mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
`OpenAIChatClient._inner_get_response()` reads `.headers` on the raw streaming response returned by `client.responses.with_raw_response.create(stream=True)` (and its three sibling call sites - retrieve-streaming, non-streaming create and background retrieve) to surface the `x-ms-served-model` Azure header, introduced in #5910. When `azure-ai-projects>=2.1.0` experimental GenAI tracing is enabled (`AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true`), the instrumentor wraps the raw streaming response in an inline `AsyncStreamWrapper` that exposes `.response` but not `.headers`. Reading `raw_create_response.headers` then raises `AttributeError: 'AsyncStreamWrapper' object has no attribute 'headers'`, which `FoundryChatClient` rethrows as a `ChatClientException` and breaks every streaming call (workflows and free chat). Fix: read the header dict via `getattr(raw_response, "headers", None)` at all four call sites. `_extract_served_model()` already short-circuits on `None`, so the served-model surfacing degrades gracefully (model stays the deployment alias) instead of crashing when the response is wrapped by an instrumentor that does not proxy `.headers`. Regression test added: `test_streaming_response_without_headers_attribute_does_not_crash` simulates a stream wrapper that raises `AttributeError` on `.headers` and asserts the stream still completes with the deployment alias as `update.model`. Fixes #6028 Co-authored-by: Emilien Mottet <emilien.mottet@michelin.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
efdabd56dc
commit
3db2004e49
@@ -636,7 +636,11 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
continuation_token["response_id"],
|
||||
stream=True,
|
||||
)
|
||||
served_model = self._extract_served_model(raw_stream_response.headers)
|
||||
# Read headers defensively: telemetry instrumentors (e.g. azure-ai-projects
|
||||
# experimental tracing) wrap the streaming response in objects that do not
|
||||
# proxy ``.headers``. Degrade gracefully so the served-model surfacing is
|
||||
# best-effort instead of crashing the whole call.
|
||||
served_model = self._extract_served_model(getattr(raw_stream_response, "headers", None))
|
||||
async with raw_stream_response.parse() as stream_response:
|
||||
async for chunk in stream_response:
|
||||
update = self._parse_chunk_from_openai(
|
||||
@@ -677,7 +681,8 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
raw_create_response = await client.responses.with_raw_response.create(
|
||||
stream=True, **run_options
|
||||
)
|
||||
served_model = self._extract_served_model(raw_create_response.headers)
|
||||
# See note above on ``raw_stream_response.headers``.
|
||||
served_model = self._extract_served_model(getattr(raw_create_response, "headers", None))
|
||||
async with raw_create_response.parse() as stream_response:
|
||||
async for chunk in stream_response:
|
||||
update = self._parse_chunk_from_openai(
|
||||
@@ -706,7 +711,8 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
except Exception as ex:
|
||||
self._handle_request_error(ex)
|
||||
chat_response = self._parse_response_from_openai(response, options=validated_options)
|
||||
served_model = self._extract_served_model(raw_response.headers)
|
||||
# See note above on ``raw_stream_response.headers``.
|
||||
served_model = self._extract_served_model(getattr(raw_response, "headers", None))
|
||||
if served_model is not None:
|
||||
chat_response.model = served_model
|
||||
# Once the background response completes, drop the continuation_token from
|
||||
@@ -728,7 +734,8 @@ class RawOpenAIChatClient( # type: ignore[misc]
|
||||
except Exception as ex:
|
||||
self._handle_request_error(ex)
|
||||
chat_response = self._parse_response_from_openai(response, options=validated_options)
|
||||
served_model = self._extract_served_model(raw_response.headers)
|
||||
# See note above on ``raw_stream_response.headers``.
|
||||
served_model = self._extract_served_model(getattr(raw_response, "headers", None))
|
||||
if served_model is not None:
|
||||
chat_response.model = served_model
|
||||
return chat_response
|
||||
|
||||
@@ -841,6 +841,88 @@ async def test_served_model_header_not_captured_for_streaming_text_format() -> N
|
||||
assert update.model == "test-model"
|
||||
|
||||
|
||||
async def test_streaming_response_without_headers_attribute_does_not_crash() -> None:
|
||||
"""Regression for #6028.
|
||||
|
||||
Some telemetry instrumentors (e.g. ``azure-ai-projects`` experimental GenAI tracing,
|
||||
activated by ``AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true``) monkey-patch
|
||||
``openai.resources.responses.AsyncResponses.create`` at the class level and return
|
||||
an ``AsyncStreamWrapper`` whose class genuinely has no ``headers`` attribute. The
|
||||
``with_raw_response.create`` wrapper does not re-wrap the return value
|
||||
(``async_to_raw_response_wrapper`` only injects an extra header into the request),
|
||||
so ``raw_create_response`` in ``_inner_get_response`` ends up being the wrapper
|
||||
itself. Reading ``raw_create_response.headers`` used to raise ``AttributeError``
|
||||
and bubble up as ``ChatClientException``, breaking every streaming call. The
|
||||
defensive ``getattr(..., "headers", None)`` should now degrade gracefully:
|
||||
no served-model surfacing, but the stream still completes.
|
||||
"""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
events = [
|
||||
ResponseTextDeltaEvent(
|
||||
type="response.output_text.delta",
|
||||
content_index=0,
|
||||
item_id="text_item",
|
||||
output_index=0,
|
||||
sequence_number=1,
|
||||
logprobs=[],
|
||||
delta="Hello",
|
||||
),
|
||||
]
|
||||
|
||||
class _StreamWrapperWithoutHeaders:
|
||||
"""Mimics ``azure.ai.projects.telemetry._responses_instrumentor.AsyncStreamWrapper``:
|
||||
an async iterator that proxies the stream contents but does not expose ``.headers``.
|
||||
``hasattr(wrapper, "headers")`` returns ``False`` so ``getattr(..., "headers", None)``
|
||||
falls through to the default — matching the real instrumentor's class layout.
|
||||
"""
|
||||
|
||||
def __init__(self, events: list[object]) -> None:
|
||||
self._events = events
|
||||
self._iterator = iter(())
|
||||
|
||||
def __aiter__(self) -> "_StreamWrapperWithoutHeaders":
|
||||
self._iterator = iter(self._events)
|
||||
return self
|
||||
|
||||
async def __anext__(self) -> object:
|
||||
try:
|
||||
return next(self._iterator)
|
||||
except StopIteration as exc:
|
||||
raise StopAsyncIteration from exc
|
||||
|
||||
def parse(self) -> "_StreamWrapperWithoutHeaders":
|
||||
return self
|
||||
|
||||
async def __aenter__(self) -> "_StreamWrapperWithoutHeaders":
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc: BaseException | None,
|
||||
traceback: object | None,
|
||||
) -> None:
|
||||
return None
|
||||
|
||||
headerless_stream = _StreamWrapperWithoutHeaders(events)
|
||||
# Sanity-check the simulation: the real instrumentor's wrapper genuinely lacks ``.headers``.
|
||||
assert not hasattr(headerless_stream, "headers")
|
||||
|
||||
with (
|
||||
patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))),
|
||||
patch.object(client.client.responses, "create", new=AsyncMock(return_value=headerless_stream)),
|
||||
patch.object(client, "_get_metadata_from_response", return_value={}),
|
||||
):
|
||||
stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True)
|
||||
updates = [update async for update in stream]
|
||||
|
||||
assert updates, "Expected the stream to complete even when the wrapper lacks .headers"
|
||||
for update in updates:
|
||||
# No header => no override => model stays the deployment alias.
|
||||
assert update.model == "test-model"
|
||||
|
||||
|
||||
async def test_streaming_text_format_preserves_final_structured_output() -> None:
|
||||
"""Streaming structured output should still parse into the final ChatResponse value."""
|
||||
client = OpenAIChatClient(model="test-model", api_key="test-key")
|
||||
|
||||
Reference in New Issue
Block a user