Python: Fix file_search citations breaking assistant history roundtrip (#5557)

* Python: Fix file_search citations breaking assistant history roundtrip

The Responses API rejects 'input_file' inside an assistant message, but the
SDK was emitting it whenever an assistant Message contained a hosted_file
content (which is what file_search citations become). Three coordinated fixes:

1. _prepare_content_for_openai now skips hosted_file for the assistant role
   instead of mapping to input_file (which the API rejects there).

2. The streaming response.output_text.annotation.added handler attaches
   file_citation, container_file_citation, and file_path as annotations on
   text content, matching the non-streaming path. Previously streaming
   produced standalone HostedFileContent items that always tripped (1).

3. output_text serialization preserves Annotation objects on roundtrip via a
   new _annotations_to_output_text helper instead of hardcoding 'annotations'
   to []. file_search citations now survive multi-agent forwarding.

Closes #5556.

* Address PR review

- _annotations_to_output_text: fan out one entry per annotated_region for
  url_citation/container_file_citation (Annotation.annotated_regions is a
  Sequence; the API form carries one start/end per entry).
- Validate region span bounds are ints before emitting; skip otherwise.
- Add test for the file_path branch (annotation with file_id only).
- Add test verifying streamed citation events coalesce onto surrounding
  text via _finalize_response so span indices reference the merged text,
  not the empty-text streaming carrier.
This commit is contained in:
Evan Mattson
2026-04-29 16:38:19 +09:00
committed by GitHub
Unverified
parent 094f9903b3
commit 46ab47b9e1
2 changed files with 450 additions and 49 deletions
@@ -241,6 +241,85 @@ OpenAIChatOptionsT = TypeVar(
# endregion
# region Helpers
def _annotations_to_output_text(annotations: Sequence[Annotation] | None) -> list[dict[str, Any]]:
"""Convert framework `Annotation` objects to Responses API `output_text` annotation dicts.
Citations from `file_search`, `code_interpreter` file paths, and url citations all collapse
to `Annotation(type="citation", ...)` in the framework. The original API form is recovered
here so assistant messages roundtrip cleanly through history forwarding.
Each Responses API annotation dict carries at most one `start_index`/`end_index` pair, so an
`Annotation` with multiple `annotated_regions` is fanned out into one entry per region.
Regions missing valid integer span bounds are skipped.
"""
if not annotations:
return []
out: list[dict[str, Any]] = []
for annotation in annotations:
if annotation.get("type") != "citation":
continue
props = annotation.get("additional_properties") or {}
regions = annotation.get("annotated_regions") or []
file_id = annotation.get("file_id")
url = annotation.get("url")
title = annotation.get("title")
container_id = props.get("container_id")
if container_id and file_id:
for region in regions:
start = region.get("start_index")
end = region.get("end_index")
if not (isinstance(start, int) and isinstance(end, int)):
continue
entry: dict[str, Any] = {
"type": "container_file_citation",
"container_id": container_id,
"file_id": file_id,
"start_index": start,
"end_index": end,
}
if url:
entry["filename"] = url
out.append(entry)
elif url and not file_id and regions:
for region in regions:
start = region.get("start_index")
end = region.get("end_index")
if not (isinstance(start, int) and isinstance(end, int)):
continue
out.append({
"type": "url_citation",
"url": url,
"title": title or "",
"start_index": start,
"end_index": end,
})
elif file_id and url:
entry = {
"type": "file_citation",
"file_id": file_id,
"filename": url,
}
if (idx := props.get("index")) is not None:
entry["index"] = idx
out.append(entry)
elif file_id:
entry = {
"type": "file_path",
"file_id": file_id,
}
if (idx := props.get("index")) is not None:
entry["index"] = idx
out.append(entry)
return out
# endregion
# region ResponsesClient
@@ -1374,7 +1453,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
return {
"type": "output_text",
"text": content.text,
"annotations": [],
"annotations": _annotations_to_output_text(getattr(content, "annotations", None)),
}
return {
"type": "input_text",
@@ -1522,6 +1601,13 @@ class RawOpenAIChatClient( # type: ignore[misc]
"approve": content.approved,
}
case "hosted_file":
# `input_file` is an input-only content type in the Responses API and is rejected
# inside an assistant message. Hosted-file content on an assistant message
# represents a citation produced by a hosted tool (e.g., file_search) and cannot be
# meaningfully replayed as input — drop it. The accompanying text annotations carry
# the citation context for round-tripping.
if role == "assistant":
return {}
return {
"type": "input_file",
"file_id": content.file_id,
@@ -2502,45 +2588,63 @@ class RawOpenAIChatClient( # type: ignore[misc]
ann_type = _get_ann_value("type")
ann_file_id = _get_ann_value("file_id")
# Hosted-file citations attach as text annotations (matching the non-streaming path)
# so they don't roundtrip as standalone `input_file` items in assistant history.
if ann_type == "file_path":
if ann_file_id:
annotation_obj = Annotation(
type="citation",
file_id=str(ann_file_id),
additional_properties={
"annotation_index": event.annotation_index,
"index": _get_ann_value("index"),
},
raw_representation=annotation,
)
contents.append(
Content.from_hosted_file(
file_id=str(ann_file_id),
additional_properties={
"annotation_index": event.annotation_index,
"index": _get_ann_value("index"),
},
raw_representation=event,
)
Content.from_text(text="", annotations=[annotation_obj], raw_representation=event)
)
elif ann_type == "file_citation":
if ann_file_id:
ann_filename = _get_ann_value("filename")
annotation_obj = Annotation(
type="citation",
file_id=str(ann_file_id),
url=ann_filename,
additional_properties={
"annotation_index": event.annotation_index,
"index": _get_ann_value("index"),
},
raw_representation=annotation,
)
contents.append(
Content.from_hosted_file(
file_id=str(ann_file_id),
additional_properties={
"annotation_index": event.annotation_index,
"filename": _get_ann_value("filename"),
"index": _get_ann_value("index"),
},
raw_representation=event,
)
Content.from_text(text="", annotations=[annotation_obj], raw_representation=event)
)
elif ann_type == "container_file_citation":
if ann_file_id:
ann_filename = _get_ann_value("filename")
ann_start = _get_ann_value("start_index")
ann_end = _get_ann_value("end_index")
annotation_obj = Annotation(
type="citation",
file_id=str(ann_file_id),
url=ann_filename,
additional_properties={
"annotation_index": event.annotation_index,
"container_id": _get_ann_value("container_id"),
},
raw_representation=annotation,
)
if ann_start is not None and ann_end is not None:
annotation_obj["annotated_regions"] = [
TextSpanRegion(
type="text_span",
start_index=ann_start,
end_index=ann_end,
)
]
contents.append(
Content.from_hosted_file(
file_id=str(ann_file_id),
additional_properties={
"annotation_index": event.annotation_index,
"container_id": _get_ann_value("container_id"),
"filename": _get_ann_value("filename"),
"start_index": _get_ann_value("start_index"),
"end_index": _get_ann_value("end_index"),
},
raw_representation=event,
)
Content.from_text(text="", annotations=[annotation_obj], raw_representation=event)
)
elif ann_type == "url_citation":
ann_url = _get_ann_value("url")
@@ -1914,6 +1914,285 @@ def test_hosted_file_content_preparation() -> None:
assert result["file_id"] == "file_abc123"
def test_assistant_text_preserves_citation_annotations_on_roundtrip() -> None:
"""Citation annotations on assistant text should survive serialization back to the Responses API.
Previously `output_text.annotations` was hardcoded to `[]`, silently dropping `file_search`
citation context on every roundtrip. Preserving them keeps citations intact across
multi-agent forwarding.
"""
from agent_framework._types import Annotation, TextSpanRegion
client = OpenAIChatClient(model="test-model", api_key="test-key")
text_content = Content.from_text(
"Per the docs, the answer is X. See also the report.",
annotations=[
Annotation(
type="citation",
file_id="file-abc123",
url="guidelines.md",
additional_properties={"index": 12},
),
Annotation(
type="citation",
title="Quarterly Report",
url="https://example.com/report",
annotated_regions=[TextSpanRegion(type="text_span", start_index=40, end_index=46)],
),
Annotation(
type="citation",
file_id="file-container456",
url="data.csv",
additional_properties={"container_id": "container-789"},
annotated_regions=[TextSpanRegion(type="text_span", start_index=0, end_index=3)],
),
],
)
result = client._prepare_content_for_openai("assistant", text_content)
assert result["type"] == "output_text"
annotations = result["annotations"]
assert len(annotations) == 3
file_citation = next(a for a in annotations if a["type"] == "file_citation")
assert file_citation["file_id"] == "file-abc123"
assert file_citation["filename"] == "guidelines.md"
assert file_citation["index"] == 12
url_citation = next(a for a in annotations if a["type"] == "url_citation")
assert url_citation["url"] == "https://example.com/report"
assert url_citation["title"] == "Quarterly Report"
assert url_citation["start_index"] == 40
assert url_citation["end_index"] == 46
container = next(a for a in annotations if a["type"] == "container_file_citation")
assert container["file_id"] == "file-container456"
assert container["container_id"] == "container-789"
assert container["filename"] == "data.csv"
assert container["start_index"] == 0
assert container["end_index"] == 3
def test_assistant_text_preserves_file_path_annotation() -> None:
"""A `file_path`-style citation (file_id only, no url) should serialize as `file_path`."""
from agent_framework._types import Annotation
client = OpenAIChatClient(model="test-model", api_key="test-key")
text_content = Content.from_text(
"See attached.",
annotations=[
Annotation(
type="citation",
file_id="file-only",
additional_properties={"index": 42},
),
],
)
result = client._prepare_content_for_openai("assistant", text_content)
assert result["type"] == "output_text"
annotations = result["annotations"]
assert annotations == [{"type": "file_path", "file_id": "file-only", "index": 42}]
def test_assistant_text_fans_out_multiple_annotated_regions() -> None:
"""A url_citation with multiple `annotated_regions` should emit one entry per region.
The Responses API annotation dict carries one start/end pair, so a framework Annotation
with N regions must produce N output annotation entries.
"""
from agent_framework._types import Annotation, TextSpanRegion
client = OpenAIChatClient(model="test-model", api_key="test-key")
text_content = Content.from_text(
"See report. The report says X. Also report.",
annotations=[
Annotation(
type="citation",
title="Report",
url="https://example.com/report",
annotated_regions=[
TextSpanRegion(type="text_span", start_index=4, end_index=10),
TextSpanRegion(type="text_span", start_index=16, end_index=22),
TextSpanRegion(type="text_span", start_index=36, end_index=42),
],
),
],
)
result = client._prepare_content_for_openai("assistant", text_content)
annotations = result["annotations"]
assert len(annotations) == 3
assert all(a["type"] == "url_citation" for a in annotations)
spans = [(a["start_index"], a["end_index"]) for a in annotations]
assert spans == [(4, 10), (16, 22), (36, 42)]
def test_assistant_text_skips_regions_with_invalid_span() -> None:
"""Regions missing integer start/end bounds are skipped rather than emitted with `None`."""
from agent_framework._types import Annotation, TextSpanRegion
client = OpenAIChatClient(model="test-model", api_key="test-key")
text_content = Content.from_text(
"See report.",
annotations=[
Annotation(
type="citation",
title="Report",
url="https://example.com/report",
annotated_regions=[
TextSpanRegion(type="text_span"), # type: ignore[typeddict-item]
TextSpanRegion(type="text_span", start_index=4, end_index=10),
],
),
],
)
result = client._prepare_content_for_openai("assistant", text_content)
annotations = result["annotations"]
assert len(annotations) == 1
assert annotations[0]["start_index"] == 4
assert annotations[0]["end_index"] == 10
def test_assistant_text_without_annotations_emits_empty_list() -> None:
"""Plain assistant text should still emit `annotations: []` (Azure validation requires the field)."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
result = client._prepare_content_for_openai("assistant", Content.from_text("hello"))
assert result["type"] == "output_text"
assert result["text"] == "hello"
assert result["annotations"] == []
def test_streamed_file_citation_coalesces_onto_surrounding_text() -> None:
"""Streamed citation events emit empty-text Content with annotations; `_finalize_response`
coalesces consecutive text contents and unions their annotations, so the citation lands on
the merged assistant text content (not a stray empty-text entry).
Without this, span indices in the annotation would reference `text == ""` after roundtrip.
"""
text_event = MagicMock()
text_event.type = "response.output_text.delta"
text_event.delta = "Hello world."
text_event.item_id = "item_1"
text_event.output_index = 0
text_event.content_index = 0
citation_event = MagicMock()
citation_event.type = "response.output_text.annotation.added"
citation_event.annotation_index = 0
citation_event.annotation = {
"type": "file_citation",
"file_id": "file-abc",
"filename": "guidelines.md",
"index": 5,
}
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
update1 = client._parse_chunk_from_openai(text_event, chat_options, function_call_ids)
update2 = client._parse_chunk_from_openai(citation_event, chat_options, function_call_ids)
response = ChatResponse.from_updates([update1, update2])
assert len(response.messages) == 1
contents = response.messages[0].contents
assert len(contents) == 1
merged = contents[0]
assert merged.type == "text"
assert merged.text == "Hello world."
assert merged.annotations is not None
assert len(merged.annotations) == 1
assert merged.annotations[0]["file_id"] == "file-abc"
def test_streamed_file_citation_roundtrips_as_assistant_history() -> None:
"""End-to-end: file_citation arrives via streaming, then gets forwarded as assistant history.
Reproduces the user-reported sequential/group-chat workflow bug where one agent's
`file_search` citations became `input_file` items in the next agent's request and were
rejected by the Responses API.
"""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
text_event = MagicMock()
text_event.type = "response.output_text.delta"
text_event.delta = "According to the docs, the answer is X."
text_event.item_id = "item_1"
text_event.output_index = 0
text_event.content_index = 0
citation_event = MagicMock()
citation_event.type = "response.output_text.annotation.added"
citation_event.annotation_index = 0
citation_event.annotation = {
"type": "file_citation",
"file_id": "file-xyz789",
"filename": "guidelines.md",
"index": 12,
}
update1 = client._parse_chunk_from_openai(text_event, chat_options, function_call_ids)
update2 = client._parse_chunk_from_openai(citation_event, chat_options, function_call_ids)
assistant_history = Message(
role="assistant",
contents=[*update1.contents, *update2.contents],
)
prepared = client._prepare_message_for_openai(assistant_history)
assert len(prepared) == 1
content_items = prepared[0].get("content", [])
types = [c.get("type") for c in content_items]
assert "input_file" not in types, f"input_file leaked into assistant history: {types}"
output_text_items = [c for c in content_items if c.get("type") == "output_text"]
assert any(
any(a.get("type") == "file_citation" and a.get("file_id") == "file-xyz789" for a in c.get("annotations", []))
for c in output_text_items
), "file_citation annotation should survive the streaming → history roundtrip"
def test_hosted_file_in_assistant_message_does_not_emit_input_file() -> None:
"""Hosted file citations attached to an assistant message must not roundtrip as `input_file`.
The Responses API rejects `input_file` items inside an assistant role's content array;
`input_file` is an input-only content type. This guards the multi-agent / sequential workflow
case where one agent's `file_search` citations get forwarded as history to the next call.
"""
client = OpenAIChatClient(model="test-model", api_key="test-key")
assistant_msg = Message(
role="assistant",
contents=[
Content.from_text("According to the docs, the answer is X."),
Content.from_hosted_file(file_id="file_abc123"),
],
)
prepared = client._prepare_message_for_openai(assistant_msg)
assert len(prepared) == 1
assistant_item = prepared[0]
assert assistant_item["role"] == "assistant"
content_types = [c.get("type") for c in assistant_item.get("content", [])]
assert "input_file" not in content_types, (
f"`input_file` is not valid inside an assistant message; got {content_types}"
)
assert "output_text" in content_types
def test_function_approval_response_with_mcp_tool_call() -> None:
"""Test function approval response content with MCP server tool call content."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
@@ -2682,7 +2961,7 @@ def test_streaming_response_in_progress_type() -> None:
def test_streaming_annotation_added_with_file_path() -> None:
"""Test streaming annotation added event with file_path type extracts HostedFileContent."""
"""Streaming `file_path` should attach as a text annotation, matching non-streaming."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
@@ -2700,15 +2979,23 @@ def test_streaming_annotation_added_with_file_path() -> None:
assert len(response.contents) == 1
content = response.contents[0]
assert content.type == "hosted_file"
assert content.file_id == "file-abc123"
assert content.additional_properties is not None
assert content.additional_properties.get("annotation_index") == 0
assert content.additional_properties.get("index") == 42
assert content.type == "text"
assert content.annotations is not None
assert len(content.annotations) == 1
annotation = content.annotations[0]
assert annotation["type"] == "citation"
assert annotation["file_id"] == "file-abc123"
assert annotation["additional_properties"]["annotation_index"] == 0
assert annotation["additional_properties"]["index"] == 42
def test_streaming_annotation_added_with_file_citation() -> None:
"""Test streaming annotation added event with file_citation type extracts HostedFileContent."""
"""Streaming `file_citation` should attach as a text annotation, matching non-streaming.
Previously the streaming path produced a standalone `HostedFileContent`, which then
serialized as `input_file` in assistant history and was rejected by the Responses API.
Annotations on text content roundtrip cleanly.
"""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
@@ -2727,15 +3014,19 @@ def test_streaming_annotation_added_with_file_citation() -> None:
assert len(response.contents) == 1
content = response.contents[0]
assert content.type == "hosted_file"
assert content.file_id == "file-xyz789"
assert content.additional_properties is not None
assert content.additional_properties.get("filename") == "sample.txt"
assert content.additional_properties.get("index") == 15
assert content.type == "text"
assert content.annotations is not None
assert len(content.annotations) == 1
annotation = content.annotations[0]
assert annotation["type"] == "citation"
assert annotation["file_id"] == "file-xyz789"
assert annotation["url"] == "sample.txt"
assert annotation["additional_properties"]["annotation_index"] == 1
assert annotation["additional_properties"]["index"] == 15
def test_streaming_annotation_added_with_container_file_citation() -> None:
"""Test streaming annotation added event with container_file_citation type."""
"""Streaming `container_file_citation` should attach as a text annotation."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
@@ -2756,13 +3047,19 @@ def test_streaming_annotation_added_with_container_file_citation() -> None:
assert len(response.contents) == 1
content = response.contents[0]
assert content.type == "hosted_file"
assert content.file_id == "file-container123"
assert content.additional_properties is not None
assert content.additional_properties.get("container_id") == "container-456"
assert content.additional_properties.get("filename") == "data.csv"
assert content.additional_properties.get("start_index") == 10
assert content.additional_properties.get("end_index") == 50
assert content.type == "text"
assert content.annotations is not None
assert len(content.annotations) == 1
annotation = content.annotations[0]
assert annotation["type"] == "citation"
assert annotation["file_id"] == "file-container123"
assert annotation["url"] == "data.csv"
assert annotation["additional_properties"]["container_id"] == "container-456"
assert annotation["annotated_regions"] is not None
assert len(annotation["annotated_regions"]) == 1
region = annotation["annotated_regions"][0]
assert region["start_index"] == 10
assert region["end_index"] == 50
def test_streaming_annotation_added_with_url_citation() -> None: