From 330d3d7165ebb70cb5a4b8748ea78996f536cbb8 Mon Sep 17 00:00:00 2001
From: Yufeng He <40085740+he-yufeng@users.noreply.github.com>
Date: Tue, 5 May 2026 05:22:56 +0800
Subject: [PATCH] fix(openai): drop completed continuation_token from shared
 options in tool loop (#5462)

Fixes #5394.

When `background=True` is combined with local function tools,
`FunctionInvocationLayer` calls `_inner_get_response(options=mutable_options)`
repeatedly with the same dict reference across loop iterations. Once the
first poll retrieves a completed background response, `continuation_token`
stays in `mutable_options`, so every subsequent iteration takes the
`continuation_token is not None` branch and `GET`s the same completed
response instead of `POST`ing the tool results. The loop exits after
`max_iterations` with empty text and the model never sees any tool output.

After the retrieve, if the returned `ChatResponse.continuation_token` is
`None` (the background response is no longer in progress), pop
`continuation_token` and `background` from the shared options dict in
place. The next loop iteration then falls through to the normal
`responses.create`/`parse` path and posts tool results.

The diagnosis and a verified runtime monkeypatch are in the issue; this
is the same fix moved in-tree.

Co-authored-by: Yufeng He <40085740+universeplayer@users.noreply.github.com>
---
 .../openai/agent_framework_openai/_chat_client.py     | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/packages/openai/agent_framework_openai/_chat_client.py b/python/packages/openai/agent_framework_openai/_chat_client.py
index 95ef3c5e96..af7995dc45 100644
--- a/python/packages/openai/agent_framework_openai/_chat_client.py
+++ b/python/packages/openai/agent_framework_openai/_chat_client.py
@@ -667,7 +667,16 @@ class RawOpenAIChatClient(  # type: ignore[misc]
                     response = await client.responses.retrieve(continuation_token["response_id"])
                 except Exception as ex:
                     self._handle_request_error(ex)
-                return self._parse_response_from_openai(response, options=validated_options)
+                chat_response = self._parse_response_from_openai(response, options=validated_options)
+                # Once the background response completes, drop the continuation_token from
+                # the caller's options dict. FunctionInvocationLayer reuses the same dict
+                # across tool-loop iterations, so leaving it in place makes the next iteration
+                # retrieve the same completed response again instead of POSTing tool results
+                # (issue #5394). Keep `background` so subsequent iterations still create
+                # background responses.
+                if chat_response.continuation_token is None and isinstance(options, dict):
+                    options.pop("continuation_token", None)
+                return chat_response
             client, run_options, validated_options = await self._prepare_request(messages, options)
             try:
                 if "text_format" in run_options: