Python: Handle url_citation annotations in FoundryChatClient streaming responses (#5071)

* Fix url_citation annotations dropped in streaming (#5029)

Add url_citation branch to the streaming annotation handler in
_parse_chunk_from_openai, mirroring the existing non-streaming path.
The handler creates an Annotation with type='citation', title, url,
and annotated_regions (TextSpanRegion), wrapped in Content.from_text.

Update test_streaming_annotation_added_with_unknown_type to use a
truly unknown type, and add new tests for url_citation (with and
without url).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* Address review feedback for #5029: Python: [Bug]: url_citation annotations silently dropped in Foundry streaming (SharePoint grounding citations lost)

---------

Co-authored-by: Copilot <copilot@github.com>
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Co-authored-by: Evan Mattson <35585003+moonbox3@users.noreply.github.com>
This commit is contained in:
Giles Odigwe
2026-04-16 02:33:04 -07:00
committed by GitHub
Unverified
parent 52d50be9e0
commit 435c66e9c9
2 changed files with 108 additions and 3 deletions
@@ -2474,6 +2474,29 @@ class RawOpenAIChatClient( # type: ignore[misc]
raw_representation=event,
)
)
elif ann_type == "url_citation":
ann_url = _get_ann_value("url")
if ann_url:
ann_start = _get_ann_value("start_index")
ann_end = _get_ann_value("end_index")
annotation_obj = Annotation(
type="citation",
title=_get_ann_value("title") or "",
url=str(ann_url),
additional_properties={"annotation_index": event.annotation_index},
raw_representation=annotation,
)
if ann_start is not None and ann_end is not None:
annotation_obj["annotated_regions"] = [
TextSpanRegion(
type="text_span",
start_index=ann_start,
end_index=ann_end,
)
]
contents.append(
Content.from_text(text="", annotations=[annotation_obj], raw_representation=event)
)
else:
logger.debug("Unparsed annotation type in streaming: %s", ann_type)
case "response.output_item.done":
@@ -2570,8 +2570,65 @@ def test_streaming_annotation_added_with_container_file_citation() -> None:
assert content.additional_properties.get("end_index") == 50
def test_streaming_annotation_added_with_unknown_type() -> None:
"""Test streaming annotation added event with unknown type is ignored."""
def test_streaming_annotation_added_with_url_citation() -> None:
"""Test streaming annotation added event with url_citation type produces citation annotation."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
mock_event = MagicMock()
mock_event.type = "response.output_text.annotation.added"
mock_event.annotation_index = 0
mock_event.annotation = {
"type": "url_citation",
"url": "https://example.sharepoint.com/sites/my-site/doc.pdf",
"title": "doc.pdf",
"start_index": 100,
"end_index": 112,
}
response = client._parse_chunk_from_openai(mock_event, chat_options, function_call_ids)
assert len(response.contents) == 1
content = response.contents[0]
assert content.type == "text"
assert content.annotations is not None
assert len(content.annotations) == 1
annotation = content.annotations[0]
assert annotation["type"] == "citation"
assert annotation["title"] == "doc.pdf"
assert annotation["url"] == "https://example.sharepoint.com/sites/my-site/doc.pdf"
assert annotation["additional_properties"]["annotation_index"] == 0
assert annotation["raw_representation"] == mock_event.annotation
assert annotation["annotated_regions"] is not None
assert len(annotation["annotated_regions"]) == 1
region = annotation["annotated_regions"][0]
assert region["type"] == "text_span"
assert region["start_index"] == 100
assert region["end_index"] == 112
def test_streaming_annotation_added_with_url_citation_no_url() -> None:
"""Test streaming annotation added event with url_citation but missing url is ignored."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
mock_event = MagicMock()
mock_event.type = "response.output_text.annotation.added"
mock_event.annotation_index = 0
mock_event.annotation = {
"type": "url_citation",
"title": "doc.pdf",
}
response = client._parse_chunk_from_openai(mock_event, chat_options, function_call_ids)
assert len(response.contents) == 0
def test_streaming_annotation_added_with_url_citation_no_indices() -> None:
"""Test streaming annotation with url_citation that has url but no start_index/end_index."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
@@ -2582,11 +2639,36 @@ def test_streaming_annotation_added_with_unknown_type() -> None:
mock_event.annotation = {
"type": "url_citation",
"url": "https://example.com",
"title": "Example",
}
response = client._parse_chunk_from_openai(mock_event, chat_options, function_call_ids)
assert len(response.contents) == 1
annotation = response.contents[0].annotations[0]
assert annotation["type"] == "citation"
assert annotation["title"] == "Example"
assert annotation["url"] == "https://example.com"
assert annotation["additional_properties"]["annotation_index"] == 0
assert "annotated_regions" not in annotation
def test_streaming_annotation_added_with_unknown_type() -> None:
"""Test streaming annotation added event with unknown type is ignored."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
mock_event = MagicMock()
mock_event.type = "response.output_text.annotation.added"
mock_event.annotation_index = 0
mock_event.annotation = {
"type": "some_future_annotation_type",
"data": "test",
}
response = client._parse_chunk_from_openai(mock_event, chat_options, function_call_ids)
# url_citation should not produce HostedFileContent
assert len(response.contents) == 0