Python: updated declarative samples and handling of non-pydantic response formats (#5022)

* updated declarative samples and handling of non-pydantic response formats * fixed from comments * update docstring
2026-06-16 21:04:09 +08:00 · 2026-04-01 21:16:00 +02:00
parent 6acab3d1d6
commit 519bb0cb2b
21 changed files with 370 additions and 90 deletions
@@ -3,13 +3,13 @@ name: MicrosoftLearnAgent
 description: Microsoft Learn Agent
 instructions: You answer questions by searching the Microsoft Learn content only.
 model:
-    id: =Env.AZURE_FOUNDRY_PROJECT_MODEL_ID
+    id: =Env.FOUNDRY_MODEL
    options:
        temperature: 0.9
        topP: 0.95
    connection:
        kind: remote
-        endpoint: =Env.AZURE_FOUNDRY_PROJECT_ENDPOINT
+        endpoint: =Env.FOUNDRY_PROJECT_ENDPOINT
 tools:
  - kind: mcp
    name: microsoft_learn
@@ -992,6 +992,27 @@ def test_process_message_basic(mock_anthropic_client: MagicMock) -> None:
    assert response.usage_details["output_token_count"] == 5


+def test_process_message_with_dict_response_format(mock_anthropic_client: MagicMock) -> None:
+    """_process_message should preserve dict response_format values for response.value parsing."""
+    client = create_test_anthropic_client(mock_anthropic_client)
+
+    mock_message = MagicMock(spec=BetaMessage)
+    mock_message.id = "msg_123"
+    mock_message.model = "claude-3-5-sonnet-20241022"
+    mock_message.content = [BetaTextBlock(type="text", text='{"greeting": "Hello"}')]
+    mock_message.usage = BetaUsage(input_tokens=10, output_tokens=5)
+    mock_message.stop_reason = "end_turn"
+
+    response = client._process_message(
+        mock_message,
+        options={"response_format": {"type": "object", "properties": {"greeting": {"type": "string"}}}},
+    )
+
+    assert response.value is not None
+    assert isinstance(response.value, dict)
+    assert response.value["greeting"] == "Hello"
+
+
 def test_process_message_with_tool_use(mock_anthropic_client: MagicMock) -> None:
    """Test _process_message with tool use."""
    client = create_test_anthropic_client(mock_anthropic_client)
@@ -1026,20 +1026,13 @@ class RawAgent(BaseAgent, Generic[OptionsCoT]):  # type: ignore[misc]
            session_context=context["session_context"],
            suppress_response_id=context["suppress_response_id"],
        )
-
-        response_format = context["chat_options"].get("response_format")
-        if not (
-            response_format is not None and isinstance(response_format, type) and issubclass(response_format, BaseModel)
-        ):
-            response_format = None
-
        return AgentResponse(
            messages=response.messages,
            response_id=None if context["suppress_response_id"] else response.response_id,
            created_at=response.created_at,
            usage_details=response.usage_details,
            value=response.value,
-            response_format=response_format,
+            response_format=context["chat_options"].get("response_format"),
            continuation_token=response.continuation_token,
            raw_representation=response,
            additional_properties=response.additional_properties,
@@ -1125,10 +1118,9 @@ class RawAgent(BaseAgent, Generic[OptionsCoT]):  # type: ignore[misc]
        response_format: Any | None = None,
    ) -> AgentResponse[Any]:
        """Finalize response updates into a single AgentResponse."""
-        output_format_type = response_format if isinstance(response_format, type) else None
        return AgentResponse.from_updates(  # pyright: ignore[reportUnknownVariableType]
            updates,
-            output_format_type=output_format_type,
+            output_format_type=response_format,
        )

    @staticmethod
@@ -345,10 +345,9 @@ class BaseChatClient(SerializationMixin, ABC, Generic[OptionsCoT]):
        response_format: Any | None = None,
    ) -> ChatResponse[Any]:
        """Finalize response updates into a single ChatResponse."""
-        output_format_type = response_format if isinstance(response_format, type) else None
        return ChatResponse.from_updates(  # pyright: ignore[reportUnknownVariableType]
            updates,
-            output_format_type=output_format_type,
+            output_format_type=response_format,
        )

    def _build_response_stream(
@@ -2327,7 +2327,6 @@ class FunctionInvocationLayer(Generic[OptionsCoT]):
            return _get_response()

        response_format = mutable_options.get("response_format") if mutable_options else None
-        output_format_type: type[BaseModel] | None = response_format if isinstance(response_format, type) else None
        stream_result_hooks: list[Callable[[ChatResponse], Any]] = []

        async def _stream() -> AsyncIterable[ChatResponseUpdate]:
@@ -2485,6 +2484,6 @@ class FunctionInvocationLayer(Generic[OptionsCoT]):
        def _finalize(updates: Sequence[ChatResponseUpdate]) -> ChatResponse[Any]:
            # Note: stream_result_hooks are already run via inner stream's get_final_response()
            # We don't need to run them again here
-            return ChatResponse.from_updates(updates, output_format_type=output_format_type)
+            return ChatResponse.from_updates(updates, output_format_type=response_format)

        return ResponseStream(_stream(), finalizer=_finalize)
@@ -299,6 +299,7 @@ ToolModeT = TypeVar("ToolModeT", bound="ToolMode")
 AgentResponseT = TypeVar("AgentResponseT", bound="AgentResponse")
 ResponseModelT = TypeVar("ResponseModelT", bound=BaseModel | None, default=None, covariant=True)
 ResponseModelBoundT = TypeVar("ResponseModelBoundT", bound=BaseModel)
+StructuredResponseFormat = type[BaseModel] | Mapping[str, Any] | None

 CreatedAtT = str  # Use a datetimeoffset type? Or a more specific type like datetime.datetime?

@@ -1949,6 +1950,24 @@ class ContinuationToken(TypedDict):
 # endregion


+def _parse_structured_response_value(text: str, response_format: Any | None) -> Any | None:
+    if response_format is None:
+        return None
+    if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+        return response_format.model_validate_json(text)
+    if isinstance(response_format, Mapping):
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"Response text is not valid JSON: {exc}") from exc
+    logger.warning(
+        "Unable to parse structured response value, use either a Pydantic model or a dict defining the schema, "
+        "received response_format type: %s",
+        type(response_format),  # type: ignore[reportUnknownArgumentType]
+    )
+    return None
+
+
 class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
    """Represents the response to a chat request.

@@ -2014,7 +2033,7 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
        finish_reason: FinishReasonLiteral | FinishReason | None = None,
        usage_details: UsageDetails | None = None,
        value: ResponseModelT | None = None,
-        response_format: type[BaseModel] | None = None,
+        response_format: StructuredResponseFormat = None,
        continuation_token: ContinuationToken | None = None,
        additional_properties: dict[str, Any] | None = None,
        raw_representation: Any | None = None,
@@ -2058,7 +2077,7 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
        self.finish_reason = finish_reason
        self.usage_details = usage_details
        self._value: ResponseModelT | None = value
-        self._response_format: type[BaseModel] | None = response_format
+        self._response_format: StructuredResponseFormat = response_format
        self._value_parsed: bool = value is not None
        self.additional_properties = (
            _restore_compaction_annotation_in_additional_properties(additional_properties) or {}
@@ -2087,6 +2106,15 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
        output_format_type: type[ResponseModelBoundT],
    ) -> ChatResponse[ResponseModelBoundT]: ...

+    @overload
+    @classmethod
+    def from_updates(
+        cls: type[ChatResponse[Any]],
+        updates: Sequence[ChatResponseUpdate],
+        *,
+        output_format_type: Mapping[str, Any],
+    ) -> ChatResponse[Any]: ...
+
    @overload
    @classmethod
    def from_updates(
@@ -2101,7 +2129,7 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
        cls: type[ChatResponseT],
        updates: Sequence[ChatResponseUpdate],
        *,
-        output_format_type: type[BaseModel] | None = None,
+        output_format_type: StructuredResponseFormat = None,
    ) -> ChatResponseT:
        """Joins multiple updates into a single ChatResponse.

@@ -2124,10 +2152,10 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
            updates: A sequence of ChatResponseUpdate objects to combine.

        Keyword Args:
-            output_format_type: Optional Pydantic model type to parse the response text into structured data.
+            output_format_type: Optional Pydantic model type or JSON schema mapping used to parse the
+                response text into structured data.
        """
-        response_format = output_format_type if isinstance(output_format_type, type) else None
-        msg = cls(messages=[], response_format=response_format)
+        msg = cls(messages=[], response_format=output_format_type)
        for update in updates:
            _process_update(msg, update)
        _finalize_response(msg)
@@ -2142,6 +2170,15 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
        output_format_type: type[ResponseModelBoundT],
    ) -> ChatResponse[ResponseModelBoundT]: ...

+    @overload
+    @classmethod
+    async def from_update_generator(
+        cls: type[ChatResponse[Any]],
+        updates: AsyncIterable[ChatResponseUpdate],
+        *,
+        output_format_type: Mapping[str, Any],
+    ) -> ChatResponse[Any]: ...
+
    @overload
    @classmethod
    async def from_update_generator(
@@ -2156,7 +2193,7 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
        cls: type[ChatResponseT],
        updates: AsyncIterable[ChatResponseUpdate],
        *,
-        output_format_type: type[BaseModel] | None = None,
+        output_format_type: StructuredResponseFormat = None,
    ) -> ChatResponseT:
        """Joins multiple updates into a single ChatResponse.

@@ -2175,10 +2212,10 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):
            updates: An async iterable of ChatResponseUpdate objects to combine.

        Keyword Args:
-            output_format_type: Optional Pydantic model type to parse the response text into structured data.
+            output_format_type: Optional Pydantic model type or JSON schema mapping used to parse the
+                response text into structured data.
        """
-        response_format = output_format_type if isinstance(output_format_type, type) else None
-        msg = cls(messages=[], response_format=response_format)
+        msg = cls(messages=[], response_format=output_format_type)
        async for update in updates:
            _process_update(msg, update)
        _finalize_response(msg)
@@ -2198,15 +2235,12 @@ class ChatResponse(SerializationMixin, Generic[ResponseModelT]):

        Raises:
            ValidationError: If the response text doesn't match the expected schema.
+            ValueError: If the response text is not valid JSON for a non-Pydantic structured format.
        """
        if self._value_parsed:
            return self._value
-        if (
-            self._response_format is not None
-            and isinstance(self._response_format, type)
-            and issubclass(self._response_format, BaseModel)
-        ):
-            self._value = cast(ResponseModelT, self._response_format.model_validate_json(self.text))
+        if self._response_format is not None:
+            self._value = cast(ResponseModelT, _parse_structured_response_value(self.text, self._response_format))
            self._value_parsed = True
        return self._value

@@ -2397,7 +2431,7 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
        created_at: CreatedAtT | None = None,
        usage_details: UsageDetails | None = None,
        value: ResponseModelT | None = None,
-        response_format: type[BaseModel] | None = None,
+        response_format: StructuredResponseFormat = None,
        continuation_token: ContinuationToken | None = None,
        raw_representation: Any | None = None,
        additional_properties: dict[str, Any] | None = None,
@@ -2438,7 +2472,7 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
        self.created_at = created_at
        self.usage_details = usage_details
        self._value: ResponseModelT | None = value
-        self._response_format: type[BaseModel] | None = response_format
+        self._response_format: type[BaseModel] | Mapping[str, Any] | None = response_format
        self._value_parsed: bool = value is not None
        self.additional_properties = (
            _restore_compaction_annotation_in_additional_properties(additional_properties) or {}
@@ -2460,15 +2494,12 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):

        Raises:
            ValidationError: If the response text doesn't match the expected schema.
+            ValueError: If the response text is not valid JSON for a non-Pydantic structured format.
        """
        if self._value_parsed:
            return self._value
-        if (
-            self._response_format is not None
-            and isinstance(self._response_format, type)
-            and issubclass(self._response_format, BaseModel)
-        ):
-            self._value = cast(ResponseModelT, self._response_format.model_validate_json(self.text))
+        if self._response_format is not None:
+            self._value = cast(ResponseModelT, _parse_structured_response_value(self.text, self._response_format))
            self._value_parsed = True
        return self._value

@@ -2492,6 +2523,16 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
        value: Any | None = None,
    ) -> AgentResponse[ResponseModelBoundT]: ...

+    @overload
+    @classmethod
+    def from_updates(
+        cls: type[AgentResponse[Any]],
+        updates: Sequence[AgentResponseUpdate],
+        *,
+        output_format_type: Mapping[str, Any],
+        value: Any | None = None,
+    ) -> AgentResponse[Any]: ...
+
    @overload
    @classmethod
    def from_updates(
@@ -2507,7 +2548,7 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
        cls: type[AgentResponseT],
        updates: Sequence[AgentResponseUpdate],
        *,
-        output_format_type: type[BaseModel] | None = None,
+        output_format_type: StructuredResponseFormat = None,
        value: Any | None = None,
    ) -> AgentResponseT:
        """Joins multiple updates into a single AgentResponse.
@@ -2516,7 +2557,8 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
            updates: A sequence of AgentResponseUpdate objects to combine.

        Keyword Args:
-            output_format_type: Optional Pydantic model type to parse the response text into structured data.
+            output_format_type: Optional Pydantic model type or JSON schema mapping used to parse the
+                response text into structured data.
            value: Optional pre-parsed structured output value to set directly on the response.
        """
        msg = cls(messages=[], response_format=output_format_type, value=value)
@@ -2534,6 +2576,15 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
        output_format_type: type[ResponseModelBoundT],
    ) -> AgentResponse[ResponseModelBoundT]: ...

+    @overload
+    @classmethod
+    async def from_update_generator(
+        cls: type[AgentResponse[Any]],
+        updates: AsyncIterable[AgentResponseUpdate],
+        *,
+        output_format_type: Mapping[str, Any],
+    ) -> AgentResponse[Any]: ...
+
    @overload
    @classmethod
    async def from_update_generator(
@@ -2548,7 +2599,7 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
        cls: type[AgentResponseT],
        updates: AsyncIterable[AgentResponseUpdate],
        *,
-        output_format_type: type[BaseModel] | None = None,
+        output_format_type: StructuredResponseFormat = None,
    ) -> AgentResponseT:
        """Joins multiple updates into a single AgentResponse.

@@ -2556,7 +2607,8 @@ class AgentResponse(SerializationMixin, Generic[ResponseModelT]):
            updates: An async iterable of AgentResponseUpdate objects to combine.

        Keyword Args:
-            output_format_type: Optional Pydantic model type to parse the response text into structured data
+            output_format_type: Optional Pydantic model type or JSON schema mapping used to parse the
+                response text into structured data.
        """
        msg = cls(messages=[], response_format=output_format_type)
        async for update in updates:
@@ -127,9 +127,7 @@ class MockChatClient:
                yield ChatResponseUpdate(contents=[Content.from_text("another update")], role="assistant")

        def _finalize(updates: Sequence[ChatResponseUpdate]) -> ChatResponse:
-            response_format = options.get("response_format")
-            output_format_type = response_format if isinstance(response_format, type) else None
-            return ChatResponse.from_updates(updates, output_format_type=output_format_type)
+            return ChatResponse.from_updates(updates, output_format_type=options.get("response_format"))

        return ResponseStream(_stream(), finalizer=_finalize)

@@ -233,9 +231,7 @@ class MockBaseChatClient(
            await asyncio.sleep(0)

        def _finalize(updates: Sequence[ChatResponseUpdate]) -> ChatResponse:
-            response_format = options.get("response_format")
-            output_format_type = response_format if isinstance(response_format, type) else None
-            return ChatResponse.from_updates(updates, output_format_type=output_format_type)
+            return ChatResponse.from_updates(updates, output_format_type=options.get("response_format"))

        return ResponseStream(_stream(), finalizer=_finalize)

@@ -301,6 +301,56 @@ async def test_chat_client_agent_streaming_response_format_from_run_options(
    assert result.value.greeting == "Hi"


+async def test_chat_client_agent_response_format_dict_from_default_options(
+    client: SupportsChatGetResponse,
+) -> None:
+    """AgentResponse.value should parse JSON dicts from default_options response_format."""
+    json_text = json.dumps({"greeting": "Hello"})
+    client.responses.append(ChatResponse(messages=Message(role="assistant", text=json_text)))  # type: ignore[attr-defined]
+
+    agent = Agent(
+        client=client,
+        default_options={"response_format": {"type": "object", "properties": {"greeting": {"type": "string"}}}},
+    )
+    result = await agent.run("Hello")
+
+    assert result.text == json_text
+    assert result.value is not None
+    assert isinstance(result.value, dict)
+    assert result.value["greeting"] == "Hello"
+
+
+async def test_chat_client_agent_streaming_response_format_dict_from_run_options(
+    client: SupportsChatGetResponse,
+) -> None:
+    """Agent streaming should preserve mapping response_format and parse the final value as a dict."""
+    json_text = json.dumps({"greeting": "Hi"})
+    client.streaming_responses.append(  # type: ignore[attr-defined]
+        [
+            ChatResponseUpdate(
+                contents=[Content.from_text(json_text)],
+                role="assistant",
+                finish_reason="stop",
+            )
+        ]
+    )
+
+    agent = Agent(client=client)
+    stream = agent.run(
+        "Hello",
+        stream=True,
+        options={"response_format": {"type": "object", "properties": {"greeting": {"type": "string"}}}},
+    )
+    async for _ in stream:
+        pass
+    result = await stream.get_final_response()
+
+    assert result.text == json_text
+    assert result.value is not None
+    assert isinstance(result.value, dict)
+    assert result.value["greeting"] == "Hi"
+
+
 async def test_chat_client_agent_create_session(
    client: SupportsChatGetResponse,
 ) -> None:
@@ -191,9 +191,7 @@ def mock_chat_client():
                yield ChatResponseUpdate(contents=[Content.from_text(" world")], role="assistant", finish_reason="stop")

            def _finalize(updates: Sequence[ChatResponseUpdate]) -> ChatResponse:
-                response_format = options.get("response_format")
-                output_format_type = response_format if isinstance(response_format, type) else None
-                return ChatResponse.from_updates(updates, output_format_type=output_format_type)
+                return ChatResponse.from_updates(updates, output_format_type=options.get("response_format"))

            return ResponseStream(_stream(), finalizer=_finalize)

@@ -800,6 +800,19 @@ def test_chat_response_with_format_init():
    assert response.value.response == "Hello"


+def test_chat_response_with_mapping_response_format() -> None:
+    """ChatResponse.value should parse JSON when response_format is a mapping."""
+    message = Message(role="assistant", text='{"response": "Hello"}')
+    response = ChatResponse(
+        messages=message,
+        response_format={"type": "object", "properties": {"response": {"type": "string"}}},
+    )
+
+    assert response.value is not None
+    assert isinstance(response.value, dict)
+    assert response.value["response"] == "Hello"
+
+
 def test_chat_response_value_raises_on_invalid_schema():
    """Test that value property raises ValidationError with field constraint details."""

@@ -1004,6 +1017,22 @@ async def test_chat_response_from_async_generator_output_format_in_method():
    assert resp.value.response == "Hello"


+async def test_chat_response_from_async_generator_mapping_response_format() -> None:
+    async def gen() -> AsyncIterable[ChatResponseUpdate]:
+        yield ChatResponseUpdate(contents=[Content.from_text('{ "respon')], message_id="1")
+        yield ChatResponseUpdate(contents=[Content.from_text('se": "Hello" }')], message_id="1")
+
+    resp = await ChatResponse.from_update_generator(
+        gen(),
+        output_format_type={"type": "object", "properties": {"response": {"type": "string"}}},
+    )
+
+    assert resp.text == '{ "response": "Hello" }'
+    assert resp.value is not None
+    assert isinstance(resp.value, dict)
+    assert resp.value["response"] == "Hello"
+
+
 # region ToolMode


@@ -82,7 +82,7 @@ PROVIDER_TYPE_OBJECT_MAPPING: dict[str, ProviderTypeMapping] = {
    },
    "OpenAI.Chat": {
        "package": "agent_framework.openai",
-        "name": "OpenAIChatClient",
+        "name": "OpenAIChatCompletionClient",
        "model_field": "model",
        "endpoint_field": "base_url",
        "api_key_field": "api_key",
@@ -186,7 +186,7 @@ class AgentFactory:
        connections: Mapping[str, Any] | None = None,
        client_kwargs: Mapping[str, Any] | None = None,
        additional_mappings: Mapping[str, ProviderTypeMapping] | None = None,
-        default_provider: str = "OpenAI",
+        default_provider: str = "Foundry",
        safe_mode: bool = True,
        env_file_path: str | None = None,
        env_file_encoding: str | None = None,
@@ -223,7 +223,7 @@ class AgentFactory:
                    SupportsChatGetResponse implementation, and model_field is the name of the field in the
                    constructor that accepts the model.id value.
            default_provider: The default provider used when model.provider is not specified,
-                default is "OpenAI".
+                default is "Foundry", which uses the FoundryChatClient.
            safe_mode: Whether to run in safe mode, default is True.
                When safe_mode is True, environment variables are not accessible in the powerfx expressions.
                You can still use environment variables, but through the constructors of the classes.
@@ -3,7 +3,6 @@
 from __future__ import annotations

 import inspect
-import json
 import os
 import sys
 from functools import wraps
@@ -532,6 +531,48 @@ async def test_response_format_parse_path_with_conversation_id() -> None:
    assert response.model == "test-model"


+async def test_response_format_dict_parse_path() -> None:
+    mock_openai_client = _make_mock_openai_client()
+    project_client = MagicMock()
+    project_client.get_openai_client.return_value = mock_openai_client
+    client = FoundryChatClient(project_client=project_client, model="test-model")
+    response_format = {"type": "object", "properties": {"answer": {"type": "string"}}}
+
+    mock_response = MagicMock()
+    mock_response.id = "response_123"
+    mock_response.model = "test-model"
+    mock_response.created_at = 1000000000
+    mock_response.metadata = {}
+    mock_response.output_parsed = None
+    mock_response.output = []
+    mock_response.usage = None
+    mock_response.finish_reason = None
+    mock_response.conversation = None
+    mock_response.status = "completed"
+
+    mock_message_content = MagicMock()
+    mock_message_content.type = "output_text"
+    mock_message_content.text = '{"answer": "Parsed"}'
+    mock_message_content.annotations = []
+    mock_message_content.logprobs = None
+
+    mock_message_item = MagicMock()
+    mock_message_item.type = "message"
+    mock_message_item.content = [mock_message_content]
+    mock_response.output = [mock_message_item]
+    client.client.responses.create = AsyncMock(return_value=mock_response)
+
+    response = await client.get_response(
+        messages=[Message(role="user", text="Test message")],
+        options={"response_format": response_format},
+    )
+
+    assert response.response_id == "response_123"
+    assert response.value is not None
+    assert isinstance(response.value, dict)
+    assert response.value["answer"] == "Parsed"
+
+
 async def test_bad_request_error_non_content_filter() -> None:
    mock_openai_client = _make_mock_openai_client()
    project_client = MagicMock()
@@ -642,10 +683,9 @@ async def test_integration_options(
                assert isinstance(response.value, OutputStruct)
                assert "seattle" in response.value.location.lower()
            else:
-                assert response.value is None
-                response_value = json.loads(response.text)
-                assert isinstance(response_value, dict)
-                assert "location" in response_value
+                assert response.value is not None
+                assert isinstance(response.value, dict)
+                assert "location" in response.value


@pytest.mark.flaky
@@ -382,7 +382,10 @@ class OllamaChatClient(
            except Exception as ex:
                raise ChatClientException(f"Ollama chat request failed : {ex}", ex) from ex

-            return self._parse_response_from_ollama(response)
+            return self._parse_response_from_ollama(
+                response,
+                response_format=validated_options.get("response_format"),
+            )

        return _get_response()

@@ -536,7 +539,12 @@ class OllamaChatClient(
            created_at=response.created_at,
        )

-    def _parse_response_from_ollama(self, response: OllamaChatResponse) -> ChatResponse:
+    def _parse_response_from_ollama(
+        self,
+        response: OllamaChatResponse,
+        *,
+        response_format: Any | None = None,
+    ) -> ChatResponse:
        contents = self._parse_contents_from_ollama(response)

        return ChatResponse(
@@ -547,6 +555,7 @@ class OllamaChatClient(
                input_token_count=response.prompt_eval_count,
                output_token_count=response.eval_count,
            ),
+            response_format=response_format,
        )

    def _parse_tool_calls_from_ollama(self, tool_calls: Sequence[OllamaMessage.ToolCall]) -> list[Content]:
@@ -248,6 +248,33 @@ async def test_cmc(
    assert result.text == "test"


+@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
+async def test_cmc_response_format_dict(
+    mock_chat: AsyncMock,
+    ollama_unit_test_env: dict[str, str],
+    chat_history: list[Message],
+) -> None:
+    mock_chat.return_value = OllamaChatResponse(
+        message=OllamaMessage(content='{"answer": "test"}', role="assistant"),
+        model="test",
+        eval_count=1,
+        prompt_eval_count=1,
+        created_at="2024-01-01T00:00:00Z",
+    )
+    chat_history.append(Message(text="hello world", role="system"))
+    chat_history.append(Message(text="hello world", role="user"))
+
+    ollama_client = OllamaChatClient()
+    result = await ollama_client.get_response(
+        messages=chat_history,
+        options={"response_format": {"type": "object", "properties": {"answer": {"type": "string"}}}},
+    )
+
+    assert result.value is not None
+    assert isinstance(result.value, dict)
+    assert result.value["answer"] == "test"
+
+
@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
 async def test_cmc_reasoning(
    mock_chat: AsyncMock,
@@ -1912,9 +1912,7 @@ class RawOpenAIChatClient(  # type: ignore[misc]
            args["usage_details"] = usage_details
        if structured_response:
            args["value"] = structured_response
-        elif (response_format := options.get("response_format")) and isinstance(response_format, type):
-            # Only pass response_format to ChatResponse if it's a Pydantic model type,
-            # not a runtime JSON schema dict
+        elif response_format := options.get("response_format"):
            args["response_format"] = response_format
        # Set continuation_token when background operation is still in progress
        if response.status and response.status in ("in_progress", "queued"):
@@ -485,6 +485,46 @@ async def test_response_format_parse_path_with_conversation_id() -> None:
        assert response.model == "test-model"


+async def test_response_format_dict_parse_path() -> None:
+    """Test get_response response_format parsing path for runtime JSON schema mappings."""
+    client = OpenAIChatClient(model="test-model", api_key="test-key")
+    response_format = {"type": "object", "properties": {"answer": {"type": "string"}}}
+
+    mock_response = MagicMock()
+    mock_response.id = "response_123"
+    mock_response.model = "test-model"
+    mock_response.created_at = 1000000000
+    mock_response.metadata = {}
+    mock_response.output_parsed = None
+    mock_response.output = []
+    mock_response.usage = None
+    mock_response.finish_reason = None
+    mock_response.conversation = None
+    mock_response.status = "completed"
+
+    mock_message_content = MagicMock()
+    mock_message_content.type = "output_text"
+    mock_message_content.text = '{"answer": "Parsed"}'
+    mock_message_content.annotations = []
+    mock_message_content.logprobs = None
+
+    mock_message_item = MagicMock()
+    mock_message_item.type = "message"
+    mock_message_item.content = [mock_message_content]
+    mock_response.output = [mock_message_item]
+
+    with patch.object(client.client.responses, "create", return_value=mock_response):
+        response = await client.get_response(
+            messages=[Message(role="user", text="Test message")],
+            options={"response_format": response_format},
+        )
+
+    assert response.response_id == "response_123"
+    assert response.value is not None
+    assert isinstance(response.value, dict)
+    assert response.value["answer"] == "Parsed"
+
+
 async def test_bad_request_error_non_content_filter() -> None:
    """Test get_response BadRequestError without content_filter."""
    client = OpenAIChatClient(model="test-model", api_key="test-key")
@@ -3297,12 +3337,10 @@ async def test_integration_options(
                assert isinstance(response.value, OutputStruct)
                assert "seattle" in response.value.location.lower()
            else:
-                # Runtime JSON schema
-                assert response.value is None, "No structured output, can't parse any json."
-                response_value = json.loads(response.text)
-                assert isinstance(response_value, dict)
-                assert "location" in response_value
-                assert "seattle" in response_value["location"].lower()
+                assert response.value is not None
+                assert isinstance(response.value, dict)
+                assert "location" in response.value
+                assert "seattle" in response.value["location"].lower()


@pytest.mark.timeout(300)
@@ -2,7 +2,6 @@

 from __future__ import annotations

-import json
 import os
 from functools import wraps
 from pathlib import Path
@@ -322,11 +321,10 @@ async def test_integration_options(
                        assert isinstance(response.value, OutputStruct)
                        assert "seattle" in response.value.location.lower()
                    else:
-                        assert response.value is None
-                        response_value = json.loads(response.text)
-                        assert isinstance(response_value, dict)
-                        assert "location" in response_value
-                        assert "seattle" in response_value["location"].lower()
+                        assert response.value is not None
+                        assert isinstance(response.value, dict)
+                        assert "location" in response.value
+                        assert "seattle" in response.value["location"].lower()


@pytest.mark.flaky
@@ -1421,6 +1421,31 @@ def test_response_format_dict_passthrough(openai_unit_test_env: dict[str, str])
    assert prepared_options["response_format"] == custom_format


+def test_parse_response_with_dict_response_format(openai_unit_test_env: dict[str, str]) -> None:
+    """Chat completions should parse dict response_format values into response.value."""
+    client = OpenAIChatCompletionClient()
+    response = client._parse_response_from_openai(
+        ChatCompletion(
+            id="test-response",
+            object="chat.completion",
+            created=1234567890,
+            model="gpt-4o-mini",
+            choices=[
+                Choice(
+                    index=0,
+                    message=ChatCompletionMessage(role="assistant", content='{"answer": "Hello"}'),
+                    finish_reason="stop",
+                )
+            ],
+        ),
+        options={"response_format": {"type": "object", "properties": {"answer": {"type": "string"}}}},
+    )
+
+    assert response.value is not None
+    assert isinstance(response.value, dict)
+    assert response.value["answer"] == "Hello"
+
+
 def test_multiple_function_calls_in_single_message(
    openai_unit_test_env: dict[str, str],
 ) -> None:
@@ -1635,12 +1660,10 @@ async def test_integration_options(
                assert isinstance(response.value, OutputStruct)
                assert "seattle" in response.value.location.lower()
            else:
-                # Runtime JSON schema
-                assert response.value is None, "No structured output, can't parse any json."
-                response_value = json.loads(response.text)
-                assert isinstance(response_value, dict)
-                assert "location" in response_value
-                assert "seattle" in response_value["location"].lower()
+                assert response.value is not None
+                assert isinstance(response.value, dict)
+                assert "location" in response.value
+                assert "seattle" in response.value["location"].lower()


@pytest.mark.flaky
@@ -18,7 +18,7 @@ Prerequisites:
 - `pip install agent-framework-foundry agent-framework-declarative --pre`
 - Set the following environment variables in a .env file or your environment:
    - FOUNDRY_PROJECT_ENDPOINT
-    - AZURE_OPENAI_MODEL
+    - FOUNDRY_MODEL
 """


@@ -31,7 +31,7 @@ instructions: Specialized diagnostic and issue detection agent for systems with
 description: A agent that performs diagnostics on systems and can escalate issues when critical errors are detected.

 model:
-  id: =Env.AZURE_OPENAI_MODEL
+  id: =Env.FOUNDRY_MODEL
 """
    # create the agent from the yaml
    async with (
@@ -9,6 +9,20 @@ from dotenv import load_dotenv
 # Load environment variables from .env file
 load_dotenv()

+"""
+This sample demonstrates creating an agent from a declarative YAML file specification.
+
+It uses a MCP server to connect to the Microsoft Learn content and a FoundryChatClient.
+
+The yaml also has some chat options set, such as temperature and topP.
+These options do not work with newer OpenAI models, so ensure to use a compatible model such as gpt-4o-mini.
+
+Environment variables:
+- FOUNDRY_PROJECT_ENDPOINT: The endpoint URL for the Foundry project.
+- FOUNDRY_MODEL: The model ID to use for the agent, make sure it is compatible with the chat options specified in
+    the yaml, or remove the options.
+"""
+

 async def main():
    """Create an agent from a declarative yaml specification and run it."""
@@ -14,11 +14,8 @@ async def main():
    # get the path
    current_path = Path(__file__).parent
    yaml_path = current_path.parent.parent.parent.parent / "agent-samples" / "openai" / "OpenAIResponses.yaml"
-    # load the yaml from the path
-    with yaml_path.open("r") as f:
-        yaml_str = f.read()
    # create the agent from the yaml
-    agent = AgentFactory(safe_mode=False).create_agent_from_yaml(yaml_str)
+    agent = AgentFactory(safe_mode=False).create_agent_from_yaml_path(yaml_path)
    # use the agent
    response = await agent.run("Why is the sky blue, answer in Dutch?")
    # Use response.value with try/except for safe parsing