diff --git a/python/packages/gemini/README.md b/python/packages/gemini/README.md index 80b7adba73..1dc789098b 100644 --- a/python/packages/gemini/README.md +++ b/python/packages/gemini/README.md @@ -10,6 +10,10 @@ pip install agent-framework-gemini --pre The Gemini integration enables Microsoft Agent Framework applications to call Google Gemini models with familiar chat abstractions, including streaming, tool/function calling, and structured output. +## Structured Output + +Gemini structured output can be configured with either a Pydantic model in `response_format`, a JSON schema mapping in `response_format`, or a Gemini-specific `response_schema`. Declarative agents that define `outputSchema` pass that schema through `response_format`. + ## Authentication The connector supports both `google-genai` authentication modes. diff --git a/python/packages/gemini/agent_framework_gemini/_chat_client.py b/python/packages/gemini/agent_framework_gemini/_chat_client.py index 0bc33c56a5..5ec5575fb7 100644 --- a/python/packages/gemini/agent_framework_gemini/_chat_client.py +++ b/python/packages/gemini/agent_framework_gemini/_chat_client.py @@ -109,8 +109,8 @@ class GeminiChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to or ``types.Tool`` objects returned by ``get_code_interpreter_tool``, ``get_web_search_tool``, ``get_mcp_tool``, ``get_file_search_tool``, or ``get_maps_grounding_tool``. tool_choice: How the model picks a tool. One of ``'auto'``, ``'none'``, or ``'required'``. - response_format: Pydantic model type for structured JSON output. The response text is - parsed into the model and exposed via ``ChatResponse.value``. + response_format: Pydantic model type or JSON schema mapping for structured JSON output. + The response text is parsed and exposed via ``ChatResponse.value``. instructions: Extra system-level instructions prepended to the system message. Not supported, and passing these raises a type error: @@ -255,6 +255,29 @@ _OPTION_CONSUMED_KEYS: frozenset[str] = frozenset({ _OPTION_EXCLUDE_KEYS: frozenset[str] = _OPTION_EXPLICIT_KEYS | _OPTION_CONSUMED_KEYS +_JSON_SCHEMA_TYPES: frozenset[str] = frozenset({ + "array", + "boolean", + "integer", + "null", + "number", + "object", + "string", +}) + +_JSON_SCHEMA_KEYWORDS: frozenset[str] = frozenset({ + "$defs", + "additionalProperties", + "allOf", + "anyOf", + "enum", + "items", + "oneOf", + "properties", + "required", + "type", +}) + _FINISH_REASON_MAP: dict[str, FinishReasonLiteral] = { "STOP": "stop", "MAX_TOKENS": "length", @@ -747,9 +770,13 @@ class RawGeminiChatClient( continue kwargs[_OPTION_TRANSLATIONS.get(key, key)] = value - if options.get("response_format") or options.get("response_schema"): + response_format = options.get("response_format") + response_schema = options.get("response_schema") + if response_format is not None or response_schema is not None: kwargs["response_mime_type"] = "application/json" - if schema := options.get("response_schema"): + if response_schema is not None: + kwargs["response_schema"] = response_schema + elif (schema := self._extract_response_schema(response_format)) is not None: kwargs["response_schema"] = schema if tools := self._prepare_tools(options): kwargs["tools"] = tools @@ -762,6 +789,48 @@ class RawGeminiChatClient( return types.GenerateContentConfig(**kwargs) + @staticmethod + def _extract_response_schema(response_format: Any) -> dict[str, Any] | None: + """Extract a Gemini response schema from supported mapping response_format shapes.""" + if not isinstance(response_format, Mapping): + return None + mapping = cast("Mapping[str, Any]", response_format) + + if (nested := RawGeminiChatClient._extract_response_schema(mapping.get("format"))) is not None: + return nested + + json_schema = mapping.get("json_schema") + if isinstance(json_schema, Mapping): + schema = cast("Mapping[str, Any]", json_schema).get("schema") + if isinstance(schema, Mapping): + return dict(cast("Mapping[str, Any]", schema)) + + schema = mapping.get("schema") + if isinstance(schema, Mapping): + return dict(cast("Mapping[str, Any]", schema)) + + if RawGeminiChatClient._is_json_schema_mapping(mapping): + return dict(mapping) + + return None + + @staticmethod + def _is_json_schema_mapping(value: Mapping[str, Any]) -> bool: + """Return True when a mapping appears to be a JSON Schema rather than a response-format envelope.""" + if not any(keyword in value for keyword in _JSON_SCHEMA_KEYWORDS): + return False + + schema_type = value.get("type") + if schema_type is None: + return True + if isinstance(schema_type, str): + return schema_type in _JSON_SCHEMA_TYPES + if isinstance(schema_type, Sequence) and not isinstance(schema_type, (str, bytes)): + entries = cast("Sequence[object]", schema_type) + return all(isinstance(item, str) and item in _JSON_SCHEMA_TYPES for item in entries) + + return False + def _prepare_tools(self, options: Mapping[str, Any]) -> list[types.Tool] | None: """Translate the framework tool list into Gemini API tool objects. diff --git a/python/packages/gemini/tests/test_gemini_client.py b/python/packages/gemini/tests/test_gemini_client.py index ab6cde241e..732ca6635b 100644 --- a/python/packages/gemini/tests/test_gemini_client.py +++ b/python/packages/gemini/tests/test_gemini_client.py @@ -9,7 +9,7 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest -from agent_framework import Content, FunctionTool, Message +from agent_framework import Agent, Content, FunctionTool, Message from google.genai import types from pydantic import BaseModel @@ -915,6 +915,20 @@ async def test_response_format_populates_value_on_chat_response() -> None: assert response.value == Reply(text="hello") +async def test_response_format_mapping_populates_value_on_chat_response() -> None: + """When response_format is a JSON schema mapping, ChatResponse.value must parse the response text.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"text": "hello"}')])) + schema = {"type": "object", "properties": {"text": {"type": "string"}}} + + response = await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": schema}, + ) + + assert response.value == {"text": "hello"} + + async def test_response_schema_added_to_config() -> None: """Sets both response_mime_type and the raw schema on the config when response_schema is given.""" client, mock = _make_gemini_client() @@ -931,6 +945,284 @@ async def test_response_schema_added_to_config() -> None: assert config.response_schema == schema +async def test_response_format_raw_json_schema_added_to_config() -> None: + """For declarative outputSchema, response_format may already be a raw JSON schema mapping.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')])) + schema = { + "type": "object", + "properties": {"answer": {"type": "string", "description": "The answer."}}, + "required": ["answer"], + } + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": schema}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema == schema + + +async def test_agent_default_options_response_format_raw_schema_added_to_config() -> None: + """Agent default_options is the path used by declarative outputSchema.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')])) + schema = {"type": "object", "properties": {"answer": {"type": "string"}}, "required": ["answer"]} + agent = Agent(client=client, default_options={"response_format": schema}) + + await agent.run("Hi") + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema == schema + + +async def test_response_format_complex_raw_json_schema_preserved() -> None: + """Nested declarative schemas should be forwarded without losing shape or constraints.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "ok"}')])) + schema = { + "type": "object", + "properties": { + "answer": {"type": "string"}, + "citations": { + "type": "array", + "items": { + "type": "object", + "properties": { + "source": {"type": "string"}, + "confidence": {"type": "number"}, + }, + "required": ["source"], + "additionalProperties": False, + }, + }, + }, + "required": ["answer"], + "additionalProperties": False, + } + + await client.get_response( + messages=[ + Message( + role="user", + contents=[ + Content.from_text( + "Summarize a long document while preserving citation metadata.\n" + ("context\n" * 128) + ) + ], + ) + ], + options={"response_format": schema}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_schema == schema + + +async def test_response_format_json_schema_envelope_added_to_config() -> None: + """OpenAI-style json_schema envelopes should still provide Gemini with the inner schema.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')])) + schema = {"type": "object", "properties": {"answer": {"type": "string"}}} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": {"type": "json_schema", "json_schema": {"name": "Answer", "schema": schema}}}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema == schema + + +async def test_response_format_format_envelope_added_to_config() -> None: + """Responses-style format envelopes should also provide Gemini with the nested schema.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')])) + schema = {"type": "object", "properties": {"answer": {"type": "string"}}} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": {"format": {"type": "json_schema", "name": "Answer", "schema": schema}}}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema == schema + + +async def test_response_format_direct_schema_key_added_to_config() -> None: + """Provider-normalized mappings with a direct schema key should be accepted.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')])) + schema = {"type": "object", "properties": {"answer": {"type": "string"}}} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": {"schema": schema}}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema == schema + + +async def test_response_format_json_schema_envelope_preserves_empty_schema() -> None: + """An explicitly empty JSON schema is still a schema and should not be dropped as falsy.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")])) + schema: dict[str, Any] = {} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": {"type": "json_schema", "json_schema": {"name": "AnyJson", "schema": schema}}}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_schema == schema + + +async def test_response_format_anyof_raw_schema_added_to_config() -> None: + """Raw schemas without a type should still be recognized when they use JSON Schema keywords.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='"ok"')])) + schema = {"anyOf": [{"type": "string"}, {"type": "number"}]} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": schema}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_schema == schema + + +async def test_response_format_union_type_raw_schema_added_to_config() -> None: + """JSON Schema union type arrays should be treated as raw schemas.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "hello"}')])) + schema = {"type": ["object", "null"], "properties": {"answer": {"type": "string"}}} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": schema}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_schema == schema + + +async def test_response_format_json_object_does_not_set_schema() -> None: + """A JSON-object response_format requests JSON output but is not itself a Gemini response schema.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")])) + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": {"type": "json_object"}}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema is None + + +async def test_response_format_json_schema_without_inner_schema_does_not_set_schema() -> None: + """A json_schema envelope without a schema should not be mistaken for a raw JSON schema.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")])) + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": {"type": "json_schema", "json_schema": {"name": "MissingSchema"}}}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema is None + + +async def test_response_schema_takes_precedence_over_response_format_schema() -> None: + """An explicit Gemini response_schema should win when both schema options are present.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text="{}")])) + response_format_schema = {"type": "object", "properties": {"name": {"type": "string"}}} + response_schema = {"type": "object", "properties": {"id": {"type": "integer"}}} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": response_format_schema, "response_schema": response_schema}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_schema == response_schema + + +async def test_response_format_raw_schema_kept_with_tools() -> None: + """Structured output must still reach Gemini when function tools are present.""" + + def calculator(expression: str) -> str: + """Evaluate a simple expression.""" + return expression + + tool = FunctionTool(name="calculator", func=calculator) + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock(return_value=_make_response([_make_part(text='{"answer": "4"}')])) + schema = {"type": "object", "properties": {"answer": {"type": "string"}}, "required": ["answer"]} + + await client.get_response( + messages=[Message(role="user", contents=[Content.from_text("What is 2 + 2?")])], + options={"tools": [tool], "response_format": schema}, + ) + + config: types.GenerateContentConfig = mock.aio.models.generate_content.call_args.kwargs["config"] + assert config.response_schema == schema + assert config.tools is not None + assert config.tools[0].function_declarations[0].name == "calculator" + + +async def test_streaming_response_format_raw_schema_added_to_config() -> None: + """Streaming requests use the same config path and should also forward raw schema mappings.""" + client, mock = _make_gemini_client() + chunks = [_make_response([_make_part(text='{"answer": "hello"}')], finish_reason="STOP")] + mock.aio.models.generate_content_stream = AsyncMock(return_value=_async_iter(chunks)) + schema = {"type": "object", "properties": {"answer": {"type": "string"}}} + + stream = client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": schema}, + stream=True, + ) + async for _ in stream: + pass + + config: types.GenerateContentConfig = mock.aio.models.generate_content_stream.call_args.kwargs["config"] + assert config.response_mime_type == "application/json" + assert config.response_schema == schema + + +async def test_streaming_response_format_mapping_populates_final_value() -> None: + """Streaming responses should preserve mapping response_format for final value parsing.""" + client, mock = _make_gemini_client() + chunks = [_make_response([_make_part(text='{"answer": "hello"}')], finish_reason="STOP")] + mock.aio.models.generate_content_stream = AsyncMock(return_value=_async_iter(chunks)) + schema = {"type": "object", "properties": {"answer": {"type": "string"}}} + + stream = client.get_response( + messages=[Message(role="user", contents=[Content.from_text("Hi")])], + options={"response_format": schema}, + stream=True, + ) + async for _ in stream: + pass + + final = await stream.get_final_response() + assert final.value == {"answer": "hello"} + + async def test_streaming_response_format_passed_to_build_response_stream() -> None: """Verifies that response_format is forwarded to _build_response_stream when streaming so that structured output parsing works correctly on the final assembled response.