Python: Fix reasoning replay when store=False (#5250)

* fix reasoning content when store=False * Remove accidental worktree entries Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * remove local session sample * removed left over files * Add attribution override regression test Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-16 21:04:09 +08:00 · 2026-04-15 19:30:12 +02:00
parent 485af07b8c
commit 2b251d904f
2 changed files with 301 additions and 30 deletions
@@ -1161,7 +1161,16 @@ class RawOpenAIChatClient(  # type: ignore[misc]
            # First turn: prepend instructions as system message
            messages = prepend_instructions_to_messages(list(messages), instructions, role="system")
        # Continuation turn: instructions already exist in conversation context, skip prepending
-        request_input = self._prepare_messages_for_openai(messages)
+        request_uses_service_side_storage = False
+        for key in ("conversation_id", "previous_response_id", "conversation"):
+            value = options.get(key)
+            if isinstance(value, str) and value:
+                request_uses_service_side_storage = True
+                break
+        request_input = self._prepare_messages_for_openai(
+            messages,
+            request_uses_service_side_storage=request_uses_service_side_storage,
+        )
        if not request_input:
            raise ChatClientInvalidRequestException("Messages are required for chat completions")
        conversation_id = options.get("conversation_id")
@@ -1235,7 +1244,12 @@ class RawOpenAIChatClient(  # type: ignore[misc]
                raise ValueError("model must be a non-empty string")
            options["model"] = self.model

-    def _prepare_messages_for_openai(self, chat_messages: Sequence[Message]) -> list[dict[str, Any]]:
+    def _prepare_messages_for_openai(
+        self,
+        chat_messages: Sequence[Message],
+        *,
+        request_uses_service_side_storage: bool = True,
+    ) -> list[dict[str, Any]]:
        """Prepare the chat messages for a request.

        Allowing customization of the key names for role/author, and optionally overriding the role.
@@ -1248,31 +1262,27 @@ class RawOpenAIChatClient(  # type: ignore[misc]

        Args:
            chat_messages: The chat history to prepare.
+            request_uses_service_side_storage: Whether this request continues a service-managed
+                response/conversation and can safely reference service-scoped response items.

        Returns:
            The prepared chat messages for a request.
        """
-        list_of_list = [self._prepare_message_for_openai(message) for message in chat_messages]
+        list_of_list = [
+            self._prepare_message_for_openai(
+                message,
+                request_uses_service_side_storage=request_uses_service_side_storage,
+            )
+            for message in chat_messages
+        ]
        # Flatten the list of lists into a single list
        return list(chain.from_iterable(list_of_list))

-    @staticmethod
-    def _message_replays_provider_context(message: Message) -> bool:
-        """Return whether the message came from provider-attributed replay context.
-
-        Responses ``fc_id`` values are response-scoped and only valid while replaying
-        the same live tool loop. Once a message comes back through a context provider
-        (for example, loaded session history), that message is historical input and
-        must not reuse the original response-scoped ``fc_id``.
-        """
-        additional_properties = getattr(message, "additional_properties", None)
-        if not additional_properties:
-            return False
-        return "_attribution" in additional_properties
-
    def _prepare_message_for_openai(
        self,
        message: Message,
+        *,
+        request_uses_service_side_storage: bool = True,
    ) -> list[dict[str, Any]]:
        """Prepare a chat message for the OpenAI Responses API format."""
        all_messages: list[dict[str, Any]] = []
@@ -1280,34 +1290,63 @@ class RawOpenAIChatClient(  # type: ignore[misc]
            "type": "message",
            "role": message.role,
        }
+        additional_properties = message.additional_properties
+        replays_local_storage = "_attribution" in additional_properties
+        uses_service_side_storage = request_uses_service_side_storage and not replays_local_storage
        # Reasoning items are only valid in input when they directly preceded a function_call
-        # in the same response.  Including a reasoning item that preceded a text response
+        # in the same response. Including a reasoning item that preceded a text response
        # (i.e. no function_call in the same message) causes an API error:
        # "reasoning was provided without its required following item."
+        #
+        # Local storage is stricter: response-scoped reasoning items (rs_*) cannot be replayed
+        # back to the service unless that message is using service-side storage.
+        # In that mode we omit reasoning items and rely on function call + tool output replay.
        has_function_call = any(c.type == "function_call" for c in message.contents)
        for content in message.contents:
            match content.type:
                case "text_reasoning":
-                    if not has_function_call:
+                    if not uses_service_side_storage or not has_function_call:
                        continue  # reasoning not followed by a function_call is invalid in input
-                    reasoning = self._prepare_content_for_openai(message.role, content, message=message)
+                    reasoning = self._prepare_content_for_openai(
+                        message.role,
+                        content,
+                        replays_local_storage=replays_local_storage,
+                    )
                    if reasoning:
                        all_messages.append(reasoning)
                case "function_result":
                    new_args: dict[str, Any] = {}
-                    new_args.update(self._prepare_content_for_openai(message.role, content, message=message))
+                    new_args.update(
+                        self._prepare_content_for_openai(
+                            message.role,
+                            content,
+                            replays_local_storage=replays_local_storage,
+                        )
+                    )
                    if new_args:
                        all_messages.append(new_args)
                case "function_call":
-                    function_call = self._prepare_content_for_openai(message.role, content, message=message)
+                    function_call = self._prepare_content_for_openai(
+                        message.role,
+                        content,
+                        replays_local_storage=replays_local_storage,
+                    )
                    if function_call:
                        all_messages.append(function_call)
                case "function_approval_response" | "function_approval_request":
-                    prepared = self._prepare_content_for_openai(message.role, content, message=message)
+                    prepared = self._prepare_content_for_openai(
+                        message.role,
+                        content,
+                        replays_local_storage=replays_local_storage,
+                    )
                    if prepared:
                        all_messages.append(prepared)
                case _:
-                    prepared_content = self._prepare_content_for_openai(message.role, content, message=message)
+                    prepared_content = self._prepare_content_for_openai(
+                        message.role,
+                        content,
+                        replays_local_storage=replays_local_storage,
+                    )
                    if prepared_content:
                        if "content" not in args:
                            args["content"] = []
@@ -1321,7 +1360,7 @@ class RawOpenAIChatClient(  # type: ignore[misc]
        role: Role | str,
        content: Content,
        *,
-        message: Message | None = None,
+        replays_local_storage: bool = False,
    ) -> dict[str, Any]:
        """Prepare content for the OpenAI Responses API format."""
        role = Role(role)
@@ -1401,11 +1440,7 @@ class RawOpenAIChatClient(  # type: ignore[misc]
                    logger.warning(f"FunctionCallContent missing call_id for function '{content.name}'")
                    return {}
                fc_id = content.call_id
-                if (
-                    message is not None
-                    and not self._message_replays_provider_context(message)
-                    and content.additional_properties
-                ):
+                if not replays_local_storage and content.additional_properties:
                    live_fc_id = content.additional_properties.get("fc_id")
                    if isinstance(live_fc_id, str) and live_fc_id:
                        fc_id = live_fc_id
@@ -1015,6 +1015,84 @@ async def test_shell_call_is_invoked_as_local_shell_function_loop() -> None:
        assert len(local_shell_outputs) == 0


+async def test_tool_loop_store_false_omits_reasoning_items_from_second_request() -> None:
+    """Stateless tool-loop replay must omit response-scoped reasoning items."""
+    client = OpenAIChatClient(model="test-model", api_key="test-key")
+
+    mock_response1 = MagicMock()
+    mock_response1.output_parsed = None
+    mock_response1.metadata = {}
+    mock_response1.usage = None
+    mock_response1.id = "resp-1"
+    mock_response1.model = "test-model"
+    mock_response1.created_at = 1000000000
+    mock_response1.status = "completed"
+    mock_response1.finish_reason = "tool_calls"
+    mock_response1.incomplete = None
+    mock_response1.conversation = None
+
+    mock_reasoning_item = MagicMock()
+    mock_reasoning_item.type = "reasoning"
+    mock_reasoning_item.id = "rs_local_only"
+    mock_reasoning_item.content = []
+    mock_reasoning_item.summary = []
+    mock_reasoning_item.encrypted_content = None
+
+    mock_function_call_item = MagicMock()
+    mock_function_call_item.type = "function_call"
+    mock_function_call_item.id = "fc_tool123"
+    mock_function_call_item.call_id = "call_123"
+    mock_function_call_item.name = "get_weather"
+    mock_function_call_item.arguments = '{"location":"Amsterdam"}'
+    mock_function_call_item.status = "completed"
+
+    mock_response1.output = [mock_reasoning_item, mock_function_call_item]
+
+    mock_response2 = MagicMock()
+    mock_response2.output_parsed = None
+    mock_response2.metadata = {}
+    mock_response2.usage = None
+    mock_response2.id = "resp-2"
+    mock_response2.model = "test-model"
+    mock_response2.created_at = 1000000001
+    mock_response2.status = "completed"
+    mock_response2.finish_reason = "stop"
+    mock_response2.incomplete = None
+    mock_response2.conversation = None
+
+    mock_text_item = MagicMock()
+    mock_text_item.type = "message"
+    mock_text_content = MagicMock()
+    mock_text_content.type = "output_text"
+    mock_text_content.text = "The weather in Amsterdam is sunny."
+    mock_text_item.content = [mock_text_content]
+    mock_response2.output = [mock_text_item]
+
+    with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create:
+        response = await client.get_response(
+            messages=[Message(role="user", contents=["What's the weather in Amsterdam?"])],
+            options={
+                "store": False,
+                "tools": [get_weather],
+                "tool_choice": {"mode": "required", "required_function_name": "get_weather"},
+            },
+        )
+
+    assert response.text == "The weather in Amsterdam is sunny."
+    assert mock_create.call_count == 2
+
+    second_call_input = mock_create.call_args_list[1].kwargs["input"]
+    assert not any(item.get("type") == "reasoning" for item in second_call_input)
+
+    function_calls = [item for item in second_call_input if item.get("type") == "function_call"]
+    assert len(function_calls) == 1
+    assert function_calls[0]["id"] == "fc_tool123"
+
+    function_outputs = [item for item in second_call_input if item.get("type") == "function_call_output"]
+    assert len(function_outputs) == 1
+    assert function_outputs[0]["call_id"] == "call_123"
+
+
 def test_response_content_creation_with_shell_call() -> None:
    """Test _parse_response_from_openai with shell_call output."""
    client = OpenAIChatClient(model="test-model", api_key="test-key")
@@ -3221,6 +3299,164 @@ async def test_prepare_options_store_parameter_handling() -> None:
    assert "previous_response_id" not in options


+async def test_prepare_options_store_false_omits_reasoning_items_for_stateless_replay() -> None:
+    client = OpenAIChatClient(model="test-model", api_key="test-key")
+    messages = [
+        Message(role="user", contents=[Content.from_text(text="search for hotels")]),
+        Message(
+            role="assistant",
+            contents=[
+                Content.from_text_reasoning(
+                    id="rs_test123",
+                    text="I need to search for hotels",
+                    additional_properties={"status": "completed"},
+                ),
+                Content.from_function_call(
+                    call_id="call_1",
+                    name="search_hotels",
+                    arguments='{"city": "Paris"}',
+                    additional_properties={"fc_id": "fc_test456"},
+                ),
+            ],
+        ),
+        Message(
+            role="tool",
+            contents=[
+                Content.from_function_result(
+                    call_id="call_1",
+                    result="Found 3 hotels in Paris",
+                ),
+            ],
+        ),
+    ]
+
+    options = await client._prepare_options(messages, ChatOptions(store=False))  # type: ignore[arg-type]
+
+    assert not any(item.get("type") == "reasoning" for item in options["input"])
+    assert any(item.get("type") == "function_call" for item in options["input"])
+    assert any(item.get("type") == "function_call_output" for item in options["input"])
+
+
+async def test_prepare_options_with_conversation_id_keeps_reasoning_items() -> None:
+    client = OpenAIChatClient(model="test-model", api_key="test-key")
+    messages = [
+        Message(role="user", contents=[Content.from_text(text="search for hotels")]),
+        Message(
+            role="assistant",
+            contents=[
+                Content.from_text_reasoning(
+                    id="rs_test123",
+                    text="I need to search for hotels",
+                    additional_properties={"status": "completed"},
+                ),
+                Content.from_function_call(
+                    call_id="call_1",
+                    name="search_hotels",
+                    arguments='{"city": "Paris"}',
+                    additional_properties={"fc_id": "fc_test456"},
+                ),
+            ],
+        ),
+        Message(
+            role="tool",
+            contents=[
+                Content.from_function_result(
+                    call_id="call_1",
+                    result="Found 3 hotels in Paris",
+                ),
+            ],
+        ),
+    ]
+
+    options = await client._prepare_options(
+        messages,
+        ChatOptions(store=False, conversation_id="resp_prev123"),  # type: ignore[arg-type]
+    )
+
+    reasoning_items = [item for item in options["input"] if item.get("type") == "reasoning"]
+    assert len(reasoning_items) == 1
+    assert reasoning_items[0]["id"] == "rs_test123"
+    assert options["previous_response_id"] == "resp_prev123"
+
+
+async def test_prepare_options_with_conversation_id_omits_reasoning_items_for_attributed_replay() -> None:
+    client = OpenAIChatClient(model="test-model", api_key="test-key")
+    messages = [
+        Message(role="user", contents=[Content.from_text(text="search for hotels")]),
+        Message(
+            role="assistant",
+            contents=[
+                Content.from_text_reasoning(
+                    id="rs_history123",
+                    text="I need to search history for hotels",
+                    additional_properties={"status": "completed"},
+                ),
+                Content.from_function_call(
+                    call_id="call_history",
+                    name="search_hotels",
+                    arguments='{"city": "Paris"}',
+                    additional_properties={"fc_id": "fc_history456"},
+                ),
+            ],
+            additional_properties={"_attribution": {"source_id": "history", "source_type": "InMemoryHistoryProvider"}},
+        ),
+        Message(
+            role="tool",
+            contents=[
+                Content.from_function_result(
+                    call_id="call_history",
+                    result="Found 3 hotels in Paris",
+                ),
+            ],
+        ),
+        Message(
+            role="assistant",
+            contents=[
+                Content.from_text_reasoning(
+                    id="rs_live123",
+                    text="I should refine the search for a live follow-up",
+                    additional_properties={"status": "completed"},
+                ),
+                Content.from_function_call(
+                    call_id="call_live",
+                    name="search_hotels",
+                    arguments='{"city": "London"}',
+                    additional_properties={"fc_id": "fc_live456"},
+                ),
+            ],
+        ),
+        Message(
+            role="tool",
+            contents=[
+                Content.from_function_result(
+                    call_id="call_live",
+                    result="Found 4 hotels in London",
+                ),
+            ],
+        ),
+    ]
+
+    options = await client._prepare_options(
+        messages,
+        ChatOptions(store=False, conversation_id="resp_prev123"),  # type: ignore[arg-type]
+    )
+
+    reasoning_items = [item for item in options["input"] if item.get("type") == "reasoning"]
+    assert [item["id"] for item in reasoning_items] == ["rs_live123"]
+    assert any(
+        item.get("type") == "function_call" and item.get("call_id") == "call_history" for item in options["input"]
+    )
+    assert any(item.get("type") == "function_call" and item.get("call_id") == "call_live" for item in options["input"])
+    assert any(
+        item.get("type") == "function_call_output" and item.get("call_id") == "call_history"
+        for item in options["input"]
+    )
+    assert any(
+        item.get("type") == "function_call_output" and item.get("call_id") == "call_live" for item in options["input"]
+    )
+    assert options["previous_response_id"] == "resp_prev123"
+
+
 def _create_mock_responses_text_response(*, response_id: str) -> MagicMock:
    mock_response = MagicMock()
    mock_response.id = response_id