From 953ed7560d5273a1567a842a021d5881fe36049c Mon Sep 17 00:00:00 2001
From: peterychang <49209570+peterychang@users.noreply.github.com>
Date: Mon, 18 Aug 2025 20:23:01 -0400
Subject: [PATCH] Fix chat options for Responses API (#441)

---
 .../openai/_responses_client.py               | 82 ++++++++++++-------
 1 file changed, 54 insertions(+), 28 deletions(-)

diff --git a/python/packages/main/agent_framework/openai/_responses_client.py b/python/packages/main/agent_framework/openai/_responses_client.py
index af17d513e6..b7111f98fa 100644
--- a/python/packages/main/agent_framework/openai/_responses_client.py
+++ b/python/packages/main/agent_framework/openai/_responses_client.py
@@ -84,13 +84,19 @@ class OpenAIResponsesClientBase(OpenAIHandler, ChatClientBase):
 
     FILE_SEARCH_MAX_RESULTS: int = 50
 
+    def _filter_options(self, **kwargs: Any) -> dict[str, Any]:
+        """Filter options for the responses call."""
+        # The responses call does not support all the options that the chat completion call does.
+        # We filter out the unsupported options.
+        return {key: value for key, value in kwargs.items() if value is not None}
+
     @override
     async def get_response(
         self,
         messages: str | ChatMessage | list[str] | list[ChatMessage],
         *,
         include: list["ResponseIncludable"] | None = None,
-        instruction: str | None = None,
+        instructions: str | None = None,
         max_tokens: int | None = None,
         parallel_tool_calls: bool | None = None,
         model: str | None = None,
@@ -121,7 +127,7 @@ class OpenAIResponsesClientBase(OpenAIHandler, ChatClientBase):
         Args:
             messages: the message or messages to send to the model
             include: additional output data to include in the model response.
-            instruction: a system (or developer) message inserted into the model's context.
+            instructions: a system (or developer) message inserted into the model's context.
             max_tokens: The maximum number of tokens to generate.
             parallel_tool_calls: Whether to enable parallel tool calls.
             model: The model to use for the agent.
@@ -145,27 +151,37 @@ class OpenAIResponsesClientBase(OpenAIHandler, ChatClientBase):
         Returns:
             A chat response from the model.
         """
-        return await super().get_response(
-            messages=messages,
-            include=include,
-            instruction=instruction,
+        additional_properties = additional_properties or {}
+        additional_properties.update(
+            self._filter_options(
+                include=include,
+                instructions=instructions,
+                parallel_tool_calls=parallel_tool_calls,
+                model=model,
+                previous_response_id=previous_response_id,
+                reasoning=reasoning,
+                service_tier=service_tier,
+                truncation=truncation,
+                timeout=timeout,
+            )
+        )
+
+        chat_options = ChatOptions(
             max_tokens=max_tokens,
-            parallel_tool_calls=parallel_tool_calls,
-            model=model,
-            previous_response_id=previous_response_id,
-            reasoning=reasoning,
-            service_tier=service_tier,
             response_format=response_format,
             seed=seed,
             store=store,
             temperature=temperature,
             tool_choice=tool_choice,
-            tools=tools,
+            tools=tools,  # type: ignore
             top_p=top_p,
             user=user,
-            truncation=truncation,
-            timeout=timeout,
             additional_properties=additional_properties,
+        )
+
+        return await super().get_response(
+            messages=messages,
+            chat_options=chat_options,
             **kwargs,
         )
 
@@ -176,7 +192,7 @@ class OpenAIResponsesClientBase(OpenAIHandler, ChatClientBase):
         *,
         # TODO(peterychang): enable this option. background: bool | None = None,
         include: list["ResponseIncludable"] | None = None,
-        instruction: str | None = None,
+        instructions: str | None = None,
         max_tokens: int | None = None,
         parallel_tool_calls: bool | None = None,
         model: str | None = None,
@@ -207,7 +223,7 @@ class OpenAIResponsesClientBase(OpenAIHandler, ChatClientBase):
         Args:
             messages: the message or messages to send to the model
             include: additional output data to include in the model response.
-            instruction: a system (or developer) message inserted into the model's context.
+            instructions: a system (or developer) message inserted into the model's context.
             max_tokens: The maximum number of tokens to generate.
             parallel_tool_calls: Whether to enable parallel tool calls.
             model: The model to use for the agent.
@@ -231,27 +247,37 @@ class OpenAIResponsesClientBase(OpenAIHandler, ChatClientBase):
         Returns:
             A stream representing the response(s) from the LLM.
         """
-        async for update in super().get_streaming_response(
-            messages=messages,
-            include=include,
-            instruction=instruction,
+        additional_properties = additional_properties or {}
+        additional_properties.update(
+            self._filter_options(
+                include=include,
+                instructions=instructions,
+                parallel_tool_calls=parallel_tool_calls,
+                model=model,
+                previous_response_id=previous_response_id,
+                reasoning=reasoning,
+                service_tier=service_tier,
+                truncation=truncation,
+                timeout=timeout,
+            )
+        )
+
+        chat_options = ChatOptions(
             max_tokens=max_tokens,
-            parallel_tool_calls=parallel_tool_calls,
-            model=model,
-            previous_response_id=previous_response_id,
-            reasoning=reasoning,
-            service_tier=service_tier,
             response_format=response_format,
             seed=seed,
             store=store,
             temperature=temperature,
             tool_choice=tool_choice,
-            tools=tools,
+            tools=tools,  # type: ignore
             top_p=top_p,
             user=user,
-            truncation=truncation,
-            timeout=timeout,
             additional_properties=additional_properties,
+        )
+
+        async for update in super().get_streaming_response(
+            messages=messages,
+            chat_options=chat_options,
             **kwargs,
         ):
             yield update