Python: Fix reasoning replay when store=False (#5250)

* fix reasoning content when store=False

* Remove accidental worktree entries

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* remove local session sample

* removed left over files

* Add attribution override regression test

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Eduard van Valkenburg
2026-04-15 19:30:12 +02:00
committed by GitHub
Unverified
parent 485af07b8c
commit 2b251d904f
2 changed files with 301 additions and 30 deletions
@@ -1161,7 +1161,16 @@ class RawOpenAIChatClient( # type: ignore[misc]
# First turn: prepend instructions as system message
messages = prepend_instructions_to_messages(list(messages), instructions, role="system")
# Continuation turn: instructions already exist in conversation context, skip prepending
request_input = self._prepare_messages_for_openai(messages)
request_uses_service_side_storage = False
for key in ("conversation_id", "previous_response_id", "conversation"):
value = options.get(key)
if isinstance(value, str) and value:
request_uses_service_side_storage = True
break
request_input = self._prepare_messages_for_openai(
messages,
request_uses_service_side_storage=request_uses_service_side_storage,
)
if not request_input:
raise ChatClientInvalidRequestException("Messages are required for chat completions")
conversation_id = options.get("conversation_id")
@@ -1235,7 +1244,12 @@ class RawOpenAIChatClient( # type: ignore[misc]
raise ValueError("model must be a non-empty string")
options["model"] = self.model
def _prepare_messages_for_openai(self, chat_messages: Sequence[Message]) -> list[dict[str, Any]]:
def _prepare_messages_for_openai(
self,
chat_messages: Sequence[Message],
*,
request_uses_service_side_storage: bool = True,
) -> list[dict[str, Any]]:
"""Prepare the chat messages for a request.
Allowing customization of the key names for role/author, and optionally overriding the role.
@@ -1248,31 +1262,27 @@ class RawOpenAIChatClient( # type: ignore[misc]
Args:
chat_messages: The chat history to prepare.
request_uses_service_side_storage: Whether this request continues a service-managed
response/conversation and can safely reference service-scoped response items.
Returns:
The prepared chat messages for a request.
"""
list_of_list = [self._prepare_message_for_openai(message) for message in chat_messages]
list_of_list = [
self._prepare_message_for_openai(
message,
request_uses_service_side_storage=request_uses_service_side_storage,
)
for message in chat_messages
]
# Flatten the list of lists into a single list
return list(chain.from_iterable(list_of_list))
@staticmethod
def _message_replays_provider_context(message: Message) -> bool:
"""Return whether the message came from provider-attributed replay context.
Responses ``fc_id`` values are response-scoped and only valid while replaying
the same live tool loop. Once a message comes back through a context provider
(for example, loaded session history), that message is historical input and
must not reuse the original response-scoped ``fc_id``.
"""
additional_properties = getattr(message, "additional_properties", None)
if not additional_properties:
return False
return "_attribution" in additional_properties
def _prepare_message_for_openai(
self,
message: Message,
*,
request_uses_service_side_storage: bool = True,
) -> list[dict[str, Any]]:
"""Prepare a chat message for the OpenAI Responses API format."""
all_messages: list[dict[str, Any]] = []
@@ -1280,34 +1290,63 @@ class RawOpenAIChatClient( # type: ignore[misc]
"type": "message",
"role": message.role,
}
additional_properties = message.additional_properties
replays_local_storage = "_attribution" in additional_properties
uses_service_side_storage = request_uses_service_side_storage and not replays_local_storage
# Reasoning items are only valid in input when they directly preceded a function_call
# in the same response. Including a reasoning item that preceded a text response
# in the same response. Including a reasoning item that preceded a text response
# (i.e. no function_call in the same message) causes an API error:
# "reasoning was provided without its required following item."
#
# Local storage is stricter: response-scoped reasoning items (rs_*) cannot be replayed
# back to the service unless that message is using service-side storage.
# In that mode we omit reasoning items and rely on function call + tool output replay.
has_function_call = any(c.type == "function_call" for c in message.contents)
for content in message.contents:
match content.type:
case "text_reasoning":
if not has_function_call:
if not uses_service_side_storage or not has_function_call:
continue # reasoning not followed by a function_call is invalid in input
reasoning = self._prepare_content_for_openai(message.role, content, message=message)
reasoning = self._prepare_content_for_openai(
message.role,
content,
replays_local_storage=replays_local_storage,
)
if reasoning:
all_messages.append(reasoning)
case "function_result":
new_args: dict[str, Any] = {}
new_args.update(self._prepare_content_for_openai(message.role, content, message=message))
new_args.update(
self._prepare_content_for_openai(
message.role,
content,
replays_local_storage=replays_local_storage,
)
)
if new_args:
all_messages.append(new_args)
case "function_call":
function_call = self._prepare_content_for_openai(message.role, content, message=message)
function_call = self._prepare_content_for_openai(
message.role,
content,
replays_local_storage=replays_local_storage,
)
if function_call:
all_messages.append(function_call)
case "function_approval_response" | "function_approval_request":
prepared = self._prepare_content_for_openai(message.role, content, message=message)
prepared = self._prepare_content_for_openai(
message.role,
content,
replays_local_storage=replays_local_storage,
)
if prepared:
all_messages.append(prepared)
case _:
prepared_content = self._prepare_content_for_openai(message.role, content, message=message)
prepared_content = self._prepare_content_for_openai(
message.role,
content,
replays_local_storage=replays_local_storage,
)
if prepared_content:
if "content" not in args:
args["content"] = []
@@ -1321,7 +1360,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
role: Role | str,
content: Content,
*,
message: Message | None = None,
replays_local_storage: bool = False,
) -> dict[str, Any]:
"""Prepare content for the OpenAI Responses API format."""
role = Role(role)
@@ -1401,11 +1440,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
logger.warning(f"FunctionCallContent missing call_id for function '{content.name}'")
return {}
fc_id = content.call_id
if (
message is not None
and not self._message_replays_provider_context(message)
and content.additional_properties
):
if not replays_local_storage and content.additional_properties:
live_fc_id = content.additional_properties.get("fc_id")
if isinstance(live_fc_id, str) and live_fc_id:
fc_id = live_fc_id
@@ -1015,6 +1015,84 @@ async def test_shell_call_is_invoked_as_local_shell_function_loop() -> None:
assert len(local_shell_outputs) == 0
async def test_tool_loop_store_false_omits_reasoning_items_from_second_request() -> None:
"""Stateless tool-loop replay must omit response-scoped reasoning items."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
mock_response1 = MagicMock()
mock_response1.output_parsed = None
mock_response1.metadata = {}
mock_response1.usage = None
mock_response1.id = "resp-1"
mock_response1.model = "test-model"
mock_response1.created_at = 1000000000
mock_response1.status = "completed"
mock_response1.finish_reason = "tool_calls"
mock_response1.incomplete = None
mock_response1.conversation = None
mock_reasoning_item = MagicMock()
mock_reasoning_item.type = "reasoning"
mock_reasoning_item.id = "rs_local_only"
mock_reasoning_item.content = []
mock_reasoning_item.summary = []
mock_reasoning_item.encrypted_content = None
mock_function_call_item = MagicMock()
mock_function_call_item.type = "function_call"
mock_function_call_item.id = "fc_tool123"
mock_function_call_item.call_id = "call_123"
mock_function_call_item.name = "get_weather"
mock_function_call_item.arguments = '{"location":"Amsterdam"}'
mock_function_call_item.status = "completed"
mock_response1.output = [mock_reasoning_item, mock_function_call_item]
mock_response2 = MagicMock()
mock_response2.output_parsed = None
mock_response2.metadata = {}
mock_response2.usage = None
mock_response2.id = "resp-2"
mock_response2.model = "test-model"
mock_response2.created_at = 1000000001
mock_response2.status = "completed"
mock_response2.finish_reason = "stop"
mock_response2.incomplete = None
mock_response2.conversation = None
mock_text_item = MagicMock()
mock_text_item.type = "message"
mock_text_content = MagicMock()
mock_text_content.type = "output_text"
mock_text_content.text = "The weather in Amsterdam is sunny."
mock_text_item.content = [mock_text_content]
mock_response2.output = [mock_text_item]
with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create:
response = await client.get_response(
messages=[Message(role="user", contents=["What's the weather in Amsterdam?"])],
options={
"store": False,
"tools": [get_weather],
"tool_choice": {"mode": "required", "required_function_name": "get_weather"},
},
)
assert response.text == "The weather in Amsterdam is sunny."
assert mock_create.call_count == 2
second_call_input = mock_create.call_args_list[1].kwargs["input"]
assert not any(item.get("type") == "reasoning" for item in second_call_input)
function_calls = [item for item in second_call_input if item.get("type") == "function_call"]
assert len(function_calls) == 1
assert function_calls[0]["id"] == "fc_tool123"
function_outputs = [item for item in second_call_input if item.get("type") == "function_call_output"]
assert len(function_outputs) == 1
assert function_outputs[0]["call_id"] == "call_123"
def test_response_content_creation_with_shell_call() -> None:
"""Test _parse_response_from_openai with shell_call output."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
@@ -3221,6 +3299,164 @@ async def test_prepare_options_store_parameter_handling() -> None:
assert "previous_response_id" not in options
async def test_prepare_options_store_false_omits_reasoning_items_for_stateless_replay() -> None:
client = OpenAIChatClient(model="test-model", api_key="test-key")
messages = [
Message(role="user", contents=[Content.from_text(text="search for hotels")]),
Message(
role="assistant",
contents=[
Content.from_text_reasoning(
id="rs_test123",
text="I need to search for hotels",
additional_properties={"status": "completed"},
),
Content.from_function_call(
call_id="call_1",
name="search_hotels",
arguments='{"city": "Paris"}',
additional_properties={"fc_id": "fc_test456"},
),
],
),
Message(
role="tool",
contents=[
Content.from_function_result(
call_id="call_1",
result="Found 3 hotels in Paris",
),
],
),
]
options = await client._prepare_options(messages, ChatOptions(store=False)) # type: ignore[arg-type]
assert not any(item.get("type") == "reasoning" for item in options["input"])
assert any(item.get("type") == "function_call" for item in options["input"])
assert any(item.get("type") == "function_call_output" for item in options["input"])
async def test_prepare_options_with_conversation_id_keeps_reasoning_items() -> None:
client = OpenAIChatClient(model="test-model", api_key="test-key")
messages = [
Message(role="user", contents=[Content.from_text(text="search for hotels")]),
Message(
role="assistant",
contents=[
Content.from_text_reasoning(
id="rs_test123",
text="I need to search for hotels",
additional_properties={"status": "completed"},
),
Content.from_function_call(
call_id="call_1",
name="search_hotels",
arguments='{"city": "Paris"}',
additional_properties={"fc_id": "fc_test456"},
),
],
),
Message(
role="tool",
contents=[
Content.from_function_result(
call_id="call_1",
result="Found 3 hotels in Paris",
),
],
),
]
options = await client._prepare_options(
messages,
ChatOptions(store=False, conversation_id="resp_prev123"), # type: ignore[arg-type]
)
reasoning_items = [item for item in options["input"] if item.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["id"] == "rs_test123"
assert options["previous_response_id"] == "resp_prev123"
async def test_prepare_options_with_conversation_id_omits_reasoning_items_for_attributed_replay() -> None:
client = OpenAIChatClient(model="test-model", api_key="test-key")
messages = [
Message(role="user", contents=[Content.from_text(text="search for hotels")]),
Message(
role="assistant",
contents=[
Content.from_text_reasoning(
id="rs_history123",
text="I need to search history for hotels",
additional_properties={"status": "completed"},
),
Content.from_function_call(
call_id="call_history",
name="search_hotels",
arguments='{"city": "Paris"}',
additional_properties={"fc_id": "fc_history456"},
),
],
additional_properties={"_attribution": {"source_id": "history", "source_type": "InMemoryHistoryProvider"}},
),
Message(
role="tool",
contents=[
Content.from_function_result(
call_id="call_history",
result="Found 3 hotels in Paris",
),
],
),
Message(
role="assistant",
contents=[
Content.from_text_reasoning(
id="rs_live123",
text="I should refine the search for a live follow-up",
additional_properties={"status": "completed"},
),
Content.from_function_call(
call_id="call_live",
name="search_hotels",
arguments='{"city": "London"}',
additional_properties={"fc_id": "fc_live456"},
),
],
),
Message(
role="tool",
contents=[
Content.from_function_result(
call_id="call_live",
result="Found 4 hotels in London",
),
],
),
]
options = await client._prepare_options(
messages,
ChatOptions(store=False, conversation_id="resp_prev123"), # type: ignore[arg-type]
)
reasoning_items = [item for item in options["input"] if item.get("type") == "reasoning"]
assert [item["id"] for item in reasoning_items] == ["rs_live123"]
assert any(
item.get("type") == "function_call" and item.get("call_id") == "call_history" for item in options["input"]
)
assert any(item.get("type") == "function_call" and item.get("call_id") == "call_live" for item in options["input"])
assert any(
item.get("type") == "function_call_output" and item.get("call_id") == "call_history"
for item in options["input"]
)
assert any(
item.get("type") == "function_call_output" and item.get("call_id") == "call_live" for item in options["input"]
)
assert options["previous_response_id"] == "resp_prev123"
def _create_mock_responses_text_response(*, response_id: str) -> MagicMock:
mock_response = MagicMock()
mock_response.id = response_id