Python: Enforce approval_mode in Claude and GitHub Copilot agents (#5562)

* Python: Enforce approval_mode in Claude and GitHub Copilot agents

Tools declared with approval_mode="always_require" were bypassed by the
ClaudeAgent and GitHubCopilotAgent because their SDK-managed tool-calling
loops invoke FunctionTool.invoke() directly via package-supplied handlers,
skipping the standard _try_execute_function_calls approval gate.

Per discussion on #5494, the fix lives in the agents (not in FunctionTool):
any flag added to the tool itself can be spoofed by code with the same
level of access, so the security boundary is the agent that owns the
tool-calling loop.

- Add on_function_approval option to ClaudeAgentOptions and
  GitHubCopilotOptions. Callback receives a FunctionCallContent describing
  the pending call and returns bool (sync or async).
- Gate FunctionTool.invoke() inside each agent's existing tool-handler
  closure when approval_mode == "always_require". Default policy is deny;
  callbacks that raise also deny safely.
- Deny path returns a tool-error to the model (Claude: text content;
  Copilot: ToolResult(result_type="failure", error="approval_denied"))
  so the LLM can react gracefully instead of silently failing.
- Tests for both agents covering: deny by default, sync False, sync True,
  async True, callback-raises -> deny, no-op for never_require tools.
- Samples demonstrating sync, async, and deny-by-default flows for both
  agents.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* Address PR review: preserve empty arg dicts, reject runtime approval override

- _resolve_function_approval no longer collapses {} into None when building
  the FunctionCallContent passed to the callback (Claude + Copilot).
- Claude _apply_runtime_options and Copilot _run_impl/_stream_updates now
  raise ValueError if on_function_approval is supplied via per-run options,
  instead of silently ignoring it. Approval policy must be set at agent
  construction time.
- Drop unnecessary # type: ignore[attr-defined] on Content.name/.arguments
  in samples (Content is a unified class with both attributes defined).
- Add regression tests for the new runtime-options validation.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* warning when non callback handler and approval needed

---------

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Eduard van Valkenburg
2026-05-01 16:11:28 +02:00
committed by GitHub
Unverified
parent 626b418622
commit c1cc6ee6df
6 changed files with 783 additions and 2 deletions
@@ -1483,6 +1483,183 @@ class TestGitHubCopilotAgentToolConversion:
assert result[1] == copilot_tool
class TestGitHubCopilotAgentFunctionApproval:
"""Tests that ``approval_mode='always_require'`` is enforced at the agent boundary."""
async def test_handler_denies_when_no_callback_configured(
self,
mock_client: MagicMock,
) -> None:
"""Approval-required tool must be denied without executing when no callback is set."""
from agent_framework import tool
invocations: list[Any] = []
@tool(approval_mode="always_require")
def dangerous(path: str) -> str:
"""A tool that requires human approval."""
invocations.append(path)
return f"deleted {path}"
agent = GitHubCopilotAgent(client=mock_client)
copilot_tool = agent._tool_to_copilot_tool(dangerous) # type: ignore[reportPrivateUsage]
result = await copilot_tool.handler(ToolInvocation(arguments={"path": "/critical"}))
assert invocations == []
assert result.result_type == "failure"
assert result.error == "approval_denied"
assert "no on_function_approval callback is configured" in result.text_result_for_llm
async def test_handler_denies_when_callback_returns_false(
self,
mock_client: MagicMock,
) -> None:
"""Falsy callback return value must deny the call and skip execution."""
from agent_framework import Content, tool
invocations: list[Any] = []
seen: list[Content] = []
def deny(call: Content) -> bool:
seen.append(call)
return False
@tool(approval_mode="always_require")
def dangerous(path: str) -> str:
"""A tool that requires human approval."""
invocations.append(path)
return f"deleted {path}"
agent = GitHubCopilotAgent(
client=mock_client,
default_options={"on_function_approval": deny},
)
copilot_tool = agent._tool_to_copilot_tool(dangerous) # type: ignore[reportPrivateUsage]
result = await copilot_tool.handler(ToolInvocation(arguments={"path": "/critical"}))
assert invocations == []
assert len(seen) == 1
assert seen[0].type == "function_call"
assert seen[0].name == "dangerous" # type: ignore[attr-defined]
assert seen[0].arguments == {"path": "/critical"} # type: ignore[attr-defined]
assert result.result_type == "failure"
assert result.error == "approval_denied"
async def test_handler_executes_when_callback_returns_true(
self,
mock_client: MagicMock,
) -> None:
"""Truthy callback return value must allow the tool to execute normally."""
from agent_framework import Content, tool
def approve(call: Content) -> bool:
return True
@tool(approval_mode="always_require")
def guarded(x: int) -> str:
"""A tool that requires human approval."""
return f"result={x}"
agent = GitHubCopilotAgent(
client=mock_client,
default_options={"on_function_approval": approve},
)
copilot_tool = agent._tool_to_copilot_tool(guarded) # type: ignore[reportPrivateUsage]
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 42}))
assert result.result_type == "success"
assert result.text_result_for_llm == "result=42"
async def test_handler_supports_async_callback(
self,
mock_client: MagicMock,
) -> None:
"""Async callback must be awaited and respected."""
from agent_framework import Content, tool
async def approve(call: Content) -> bool:
return True
@tool(approval_mode="always_require")
def guarded(x: int) -> str:
"""A tool that requires human approval."""
return f"async={x}"
agent = GitHubCopilotAgent(
client=mock_client,
default_options={"on_function_approval": approve},
)
copilot_tool = agent._tool_to_copilot_tool(guarded) # type: ignore[reportPrivateUsage]
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 7}))
assert result.result_type == "success"
assert result.text_result_for_llm == "async=7"
async def test_callback_failure_denies_safely(
self,
mock_client: MagicMock,
) -> None:
"""A callback that raises must result in denial, not in tool execution."""
from agent_framework import Content, tool
invocations: list[Any] = []
def boom(call: Content) -> bool:
raise RuntimeError("nope")
@tool(approval_mode="always_require")
def dangerous(x: int) -> str:
"""A tool that requires human approval."""
invocations.append(x)
return f"x={x}"
agent = GitHubCopilotAgent(
client=mock_client,
default_options={"on_function_approval": boom},
)
copilot_tool = agent._tool_to_copilot_tool(dangerous) # type: ignore[reportPrivateUsage]
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 1}))
assert invocations == []
assert result.result_type == "failure"
assert result.error == "approval_denied"
async def test_handler_does_not_invoke_callback_for_never_require(
self,
mock_client: MagicMock,
) -> None:
"""Tools without approval_mode='always_require' must not trigger the callback."""
from agent_framework import Content, tool
callback_calls: list[Any] = []
def approve(call: Content) -> bool:
callback_calls.append(call)
return True
@tool
def safe(x: int) -> str:
"""A tool that does not require approval."""
return f"safe={x}"
agent = GitHubCopilotAgent(
client=mock_client,
default_options={"on_function_approval": approve},
)
copilot_tool = agent._tool_to_copilot_tool(safe) # type: ignore[reportPrivateUsage]
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 5}))
assert callback_calls == []
assert result.result_type == "success"
assert result.text_result_for_llm == "safe=5"
class TestGitHubCopilotAgentErrorHandling:
"""Test cases for error handling."""
@@ -2128,3 +2305,16 @@ class TestGitHubCopilotAgentContextProviders:
await agent.run("Hello", session=session, options={"timeout": 120})
assert observed_options.get("timeout") == 120
async def test_runtime_on_function_approval_rejected(self, mock_client: MagicMock) -> None:
"""Passing on_function_approval at runtime must raise rather than be silently ignored."""
agent = GitHubCopilotAgent(client=mock_client)
with pytest.raises(ValueError, match="on_function_approval"):
await agent.run("hello", options={"on_function_approval": lambda _c: True})
async def test_runtime_on_function_approval_rejected_streaming(self, mock_client: MagicMock) -> None:
"""Passing on_function_approval at runtime must raise on the streaming path too."""
agent = GitHubCopilotAgent(client=mock_client)
with pytest.raises(ValueError, match="on_function_approval"):
async for _ in agent.run("hello", stream=True, options={"on_function_approval": lambda _c: True}):
pass