Python (fix:gemini): make Gemini honor declarative outputSchema, not just JSON mode (#5893)

* fix(gemini): preserve schema response_format * fix(gemini): satisfy pyright strict in response schema extraction Cast Any-narrowed mappings to Mapping[str, Any] in the structured-output schema helpers so pyright strict no longer reports partially-unknown member, argument, and variable types. Pass response_format["format"] straight into the recursive extractor, which already guards non-mapping inputs. No behavior change. * fix(gemini): use Sequence[object] cast to satisfy both mypy and pyright The Sequence[Any] cast pyright strict needs to know the loop element type is reported as a redundant-cast by mypy, which already narrows the isinstance branch to Sequence[Any]. Cast to Sequence[object] instead: pyright gets a fully known element type and mypy no longer sees an identical-type cast. No behavior change. --------- Co-authored-by: Evan Mattson <evan.mattson@microsoft.com>
2026-06-16 21:04:09 +08:00 · 2026-06-05 23:17:51 +08:00
parent bf4ad48cf2
commit d5335fbeae
3 changed files with 370 additions and 5 deletions
@@ -10,6 +10,10 @@ pip install agent-framework-gemini --pre

 The Gemini integration enables Microsoft Agent Framework applications to call Google Gemini models with familiar chat abstractions, including streaming, tool/function calling, and structured output.

+## Structured Output
+
+Gemini structured output can be configured with either a Pydantic model in `response_format`, a JSON schema mapping in `response_format`, or a Gemini-specific `response_schema`. Declarative agents that define `outputSchema` pass that schema through `response_format`.
+
 ## Authentication

 The connector supports both `google-genai` authentication modes.
@@ -109,8 +109,8 @@ class GeminiChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to
            or ``types.Tool`` objects returned by ``get_code_interpreter_tool``, ``get_web_search_tool``,
            ``get_mcp_tool``, ``get_file_search_tool``, or ``get_maps_grounding_tool``.
        tool_choice: How the model picks a tool. One of ``'auto'``, ``'none'``, or ``'required'``.
-        response_format: Pydantic model type for structured JSON output. The response text is
-            parsed into the model and exposed via ``ChatResponse.value``.
+        response_format: Pydantic model type or JSON schema mapping for structured JSON output.
+            The response text is parsed and exposed via ``ChatResponse.value``.
        instructions: Extra system-level instructions prepended to the system message.

    Not supported, and passing these raises a type error:
@@ -255,6 +255,29 @@ _OPTION_CONSUMED_KEYS: frozenset[str] = frozenset({

 _OPTION_EXCLUDE_KEYS: frozenset[str] = _OPTION_EXPLICIT_KEYS | _OPTION_CONSUMED_KEYS

+_JSON_SCHEMA_TYPES: frozenset[str] = frozenset({
+    "array",
+    "boolean",
+    "integer",
+    "null",
+    "number",
+    "object",
+    "string",
+})
+
+_JSON_SCHEMA_KEYWORDS: frozenset[str] = frozenset({
+    "$defs",
+    "additionalProperties",
+    "allOf",
+    "anyOf",
+    "enum",
+    "items",
+    "oneOf",
+    "properties",
+    "required",
+    "type",
+})
+
 _FINISH_REASON_MAP: dict[str, FinishReasonLiteral] = {
    "STOP": "stop",
    "MAX_TOKENS": "length",
@@ -747,9 +770,13 @@ class RawGeminiChatClient(
                continue
            kwargs[_OPTION_TRANSLATIONS.get(key, key)] = value

-        if options.get("response_format") or options.get("response_schema"):
+        response_format = options.get("response_format")
+        response_schema = options.get("response_schema")
+        if response_format is not None or response_schema is not None:
            kwargs["response_mime_type"] = "application/json"
-        if schema := options.get("response_schema"):
+        if response_schema is not None:
+            kwargs["response_schema"] = response_schema
+        elif (schema := self._extract_response_schema(response_format)) is not None:
            kwargs["response_schema"] = schema
        if tools := self._prepare_tools(options):
            kwargs["tools"] = tools
@@ -762,6 +789,48 @@ class RawGeminiChatClient(

        return types.GenerateContentConfig(**kwargs)

+    @staticmethod
+    def _extract_response_schema(response_format: Any) -> dict[str, Any] | None:
+        """Extract a Gemini response schema from supported mapping response_format shapes."""
+        if not isinstance(response_format, Mapping):
+            return None
+        mapping = cast("Mapping[str, Any]", response_format)
+
+        if (nested := RawGeminiChatClient._extract_response_schema(mapping.get("format"))) is not None:
+            return nested
+
+        json_schema = mapping.get("json_schema")
+        if isinstance(json_schema, Mapping):
+            schema = cast("Mapping[str, Any]", json_schema).get("schema")
+            if isinstance(schema, Mapping):
+                return dict(cast("Mapping[str, Any]", schema))
+
+        schema = mapping.get("schema")
+        if isinstance(schema, Mapping):
+            return dict(cast("Mapping[str, Any]", schema))
+
+        if RawGeminiChatClient._is_json_schema_mapping(mapping):
+            return dict(mapping)
+
+        return None
+
+    @staticmethod
+    def _is_json_schema_mapping(value: Mapping[str, Any]) -> bool:
+        """Return True when a mapping appears to be a JSON Schema rather than a response-format envelope."""
+        if not any(keyword in value for keyword in _JSON_SCHEMA_KEYWORDS):
+            return False
+
+        schema_type = value.get("type")
+        if schema_type is None:
+            return True
+        if isinstance(schema_type, str):
+            return schema_type in _JSON_SCHEMA_TYPES
+        if isinstance(schema_type, Sequence) and not isinstance(schema_type, (str, bytes)):
+            entries = cast("Sequence[object]", schema_type)
+            return all(isinstance(item, str) and item in _JSON_SCHEMA_TYPES for item in entries)
+
+        return False
+
    def _prepare_tools(self, options: Mapping[str, Any]) -> list[types.Tool] | None:
        """Translate the framework tool list into Gemini API tool objects.

@@ -9,7 +9,7 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
-from agent_framework import Content, FunctionTool, Message
+from agent_framework import Agent, Content, FunctionTool, Message
 from google.genai import types
 from pydantic import BaseModel

@@ -915,6 +915,20 @@ async def test_response_format_populates_value_on_chat_response() -> None:
    assert response.value == Reply(text="hello")


+async def test_response_format_mapping_populates_value_on_chat_response() -> None:
+    """When response_format is a JSON schema mapping, ChatResponse.value must parse the response text."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"text": "hello"}')]))
+    schema = {"type": "object", "properties": {"text": {"type": "string"}}}
+
+    response = await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": schema},
+    )
+
+    assert response.value == {"text": "hello"}
+
+
 async def test_response_schema_added_to_config() -> None:
    """Sets both response_mime_type and the raw schema on the config when response_schema is given."""
    client, mock = _make_gemini_client()
@@ -931,6 +945,284 @@ async def test_response_schema_added_to_config() -> None:
    assert config.response_schema == schema


+async def test_response_format_raw_json_schema_added_to_config() -> None:
+    """For declarative outputSchema, response_format may already be a raw JSON schema mapping."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')]))
+    schema = {
+        "type": "object",
+        "properties": {"answer": {"type": "string", "description": "The answer."}},
+        "required": ["answer"],
+    }
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": schema},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema == schema
+
+
+async def test_agent_default_options_response_format_raw_schema_added_to_config() -> None:
+    """Agent default_options is the path used by declarative outputSchema."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')]))
+    schema = {"type": "object", "properties": {"answer": {"type": "string"}}, "required": ["answer"]}
+    agent = Agent(client=client, default_options={"response_format": schema})
+
+    await agent.run("Hi")
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema == schema
+
+
+async def test_response_format_complex_raw_json_schema_preserved() -> None:
+    """Nested declarative schemas should be forwarded without losing shape or constraints."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "ok"}')]))
+    schema = {
+        "type": "object",
+        "properties": {
+            "answer": {"type": "string"},
+            "citations": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "source": {"type": "string"},
+                        "confidence": {"type": "number"},
+                    },
+                    "required": ["source"],
+                    "additionalProperties": False,
+                },
+            },
+        },
+        "required": ["answer"],
+        "additionalProperties": False,
+    }
+
+    await client.get_response(
+        messages=[
+            Message(
+                role="user",
+                contents=[
+                    Content.from_text(
+                        "Summarize a long document while preserving citation metadata.\n" + ("context\n" * 128)
+                    )
+                ],
+            )
+        ],
+        options={"response_format": schema},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_schema == schema
+
+
+async def test_response_format_json_schema_envelope_added_to_config() -> None:
+    """OpenAI-style json_schema envelopes should still provide Gemini with the inner schema."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')]))
+    schema = {"type": "object", "properties": {"answer": {"type": "string"}}}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": {"type": "json_schema", "json_schema": {"name": "Answer", "schema": schema}}},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema == schema
+
+
+async def test_response_format_format_envelope_added_to_config() -> None:
+    """Responses-style format envelopes should also provide Gemini with the nested schema."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')]))
+    schema = {"type": "object", "properties": {"answer": {"type": "string"}}}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": {"format": {"type": "json_schema", "name": "Answer", "schema": schema}}},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema == schema
+
+
+async def test_response_format_direct_schema_key_added_to_config() -> None:
+    """Provider-normalized mappings with a direct schema key should be accepted."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')]))
+    schema = {"type": "object", "properties": {"answer": {"type": "string"}}}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": {"schema": schema}},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema == schema
+
+
+async def test_response_format_json_schema_envelope_preserves_empty_schema() -> None:
+    """An explicitly empty JSON schema is still a schema and should not be dropped as falsy."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")]))
+    schema: dict[str, Any] = {}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": {"type": "json_schema", "json_schema": {"name": "AnyJson", "schema": schema}}},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_schema == schema
+
+
+async def test_response_format_anyof_raw_schema_added_to_config() -> None:
+    """Raw schemas without a type should still be recognized when they use JSON Schema keywords."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='"ok"')]))
+    schema = {"anyOf": [{"type": "string"}, {"type": "number"}]}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": schema},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_schema == schema
+
+
+async def test_response_format_union_type_raw_schema_added_to_config() -> None:
+    """JSON Schema union type arrays should be treated as raw schemas."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')]))
+    schema = {"type": ["object", "null"], "properties": {"answer": {"type": "string"}}}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": schema},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_schema == schema
+
+
+async def test_response_format_json_object_does_not_set_schema() -> None:
+    """A JSON-object response_format requests JSON output but is not itself a Gemini response schema."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")]))
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": {"type": "json_object"}},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema is None
+
+
+async def test_response_format_json_schema_without_inner_schema_does_not_set_schema() -> None:
+    """A json_schema envelope without a schema should not be mistaken for a raw JSON schema."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")]))
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": {"type": "json_schema", "json_schema": {"name": "MissingSchema"}}},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema is None
+
+
+async def test_response_schema_takes_precedence_over_response_format_schema() -> None:
+    """An explicit Gemini response_schema should win when both schema options are present."""
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")]))
+    response_format_schema = {"type": "object", "properties": {"name": {"type": "string"}}}
+    response_schema = {"type": "object", "properties": {"id": {"type": "integer"}}}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": response_format_schema, "response_schema": response_schema},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_schema == response_schema
+
+
+async def test_response_format_raw_schema_kept_with_tools() -> None:
+    """Structured output must still reach Gemini when function tools are present."""
+
+    def calculator(expression: str) -> str:
+        """Evaluate a simple expression."""
+        return expression
+
+    tool = FunctionTool(name="calculator", func=calculator)
+    client, mock = _make_gemini_client()
+    mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "4"}')]))
+    schema = {"type": "object", "properties": {"answer": {"type": "string"}}, "required": ["answer"]}
+
+    await client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("What is 2 + 2?")])],
+        options={"tools": [tool], "response_format": schema},
+    )
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"]
+    assert config.response_schema == schema
+    assert config.tools is not None
+    assert config.tools[0].function_declarations[0].name == "calculator"
+
+
+async def test_streaming_response_format_raw_schema_added_to_config() -> None:
+    """Streaming requests use the same config path and should also forward raw schema mappings."""
+    client, mock = _make_gemini_client()
+    chunks = [_make_response([_make_part(text='{"answer": "hello"}')], finish_reason="STOP")]
+    mock.aio.models.generate_content_stream = AsyncMock(return_value=_async_iter(chunks))
+    schema = {"type": "object", "properties": {"answer": {"type": "string"}}}
+
+    stream = client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": schema},
+        stream=True,
+    )
+    async for _ in stream:
+        pass
+
+    config: types.GenerateContentConfig = mock.aio.models.generate_content_stream.call_args.kwargs["config"]
+    assert config.response_mime_type == "application/json"
+    assert config.response_schema == schema
+
+
+async def test_streaming_response_format_mapping_populates_final_value() -> None:
+    """Streaming responses should preserve mapping response_format for final value parsing."""
+    client, mock = _make_gemini_client()
+    chunks = [_make_response([_make_part(text='{"answer": "hello"}')], finish_reason="STOP")]
+    mock.aio.models.generate_content_stream = AsyncMock(return_value=_async_iter(chunks))
+    schema = {"type": "object", "properties": {"answer": {"type": "string"}}}
+
+    stream = client.get_response(
+        messages=[Message(role="user", contents=[Content.from_text("Hi")])],
+        options={"response_format": schema},
+        stream=True,
+    )
+    async for _ in stream:
+        pass
+
+    final = await stream.get_final_response()
+    assert final.value == {"answer": "hello"}
+
+
 async def test_streaming_response_format_passed_to_build_response_stream() -> None:
    """Verifies that response_format is forwarded to _build_response_stream when streaming
    so that structured output parsing works correctly on the final assembled response.