mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Enforce approval_mode in Claude and GitHub Copilot agents (#5562)
* Python: Enforce approval_mode in Claude and GitHub Copilot agents Tools declared with approval_mode="always_require" were bypassed by the ClaudeAgent and GitHubCopilotAgent because their SDK-managed tool-calling loops invoke FunctionTool.invoke() directly via package-supplied handlers, skipping the standard _try_execute_function_calls approval gate. Per discussion on #5494, the fix lives in the agents (not in FunctionTool): any flag added to the tool itself can be spoofed by code with the same level of access, so the security boundary is the agent that owns the tool-calling loop. - Add on_function_approval option to ClaudeAgentOptions and GitHubCopilotOptions. Callback receives a FunctionCallContent describing the pending call and returns bool (sync or async). - Gate FunctionTool.invoke() inside each agent's existing tool-handler closure when approval_mode == "always_require". Default policy is deny; callbacks that raise also deny safely. - Deny path returns a tool-error to the model (Claude: text content; Copilot: ToolResult(result_type="failure", error="approval_denied")) so the LLM can react gracefully instead of silently failing. - Tests for both agents covering: deny by default, sync False, sync True, async True, callback-raises -> deny, no-op for never_require tools. - Samples demonstrating sync, async, and deny-by-default flows for both agents. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address PR review: preserve empty arg dicts, reject runtime approval override - _resolve_function_approval no longer collapses {} into None when building the FunctionCallContent passed to the callback (Claude + Copilot). - Claude _apply_runtime_options and Copilot _run_impl/_stream_updates now raise ValueError if on_function_approval is supplied via per-run options, instead of silently ignoring it. Approval policy must be set at agent construction time. - Drop unnecessary # type: ignore[attr-defined] on Content.name/.arguments in samples (Content is a unified class with both attributes defined). - Add regression tests for the new runtime-options validation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * warning when non callback handler and approval needed --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
626b418622
commit
c1cc6ee6df
@@ -1483,6 +1483,183 @@ class TestGitHubCopilotAgentToolConversion:
|
||||
assert result[1] == copilot_tool
|
||||
|
||||
|
||||
class TestGitHubCopilotAgentFunctionApproval:
|
||||
"""Tests that ``approval_mode='always_require'`` is enforced at the agent boundary."""
|
||||
|
||||
async def test_handler_denies_when_no_callback_configured(
|
||||
self,
|
||||
mock_client: MagicMock,
|
||||
) -> None:
|
||||
"""Approval-required tool must be denied without executing when no callback is set."""
|
||||
from agent_framework import tool
|
||||
|
||||
invocations: list[Any] = []
|
||||
|
||||
@tool(approval_mode="always_require")
|
||||
def dangerous(path: str) -> str:
|
||||
"""A tool that requires human approval."""
|
||||
invocations.append(path)
|
||||
return f"deleted {path}"
|
||||
|
||||
agent = GitHubCopilotAgent(client=mock_client)
|
||||
copilot_tool = agent._tool_to_copilot_tool(dangerous) # type: ignore[reportPrivateUsage]
|
||||
|
||||
result = await copilot_tool.handler(ToolInvocation(arguments={"path": "/critical"}))
|
||||
|
||||
assert invocations == []
|
||||
assert result.result_type == "failure"
|
||||
assert result.error == "approval_denied"
|
||||
assert "no on_function_approval callback is configured" in result.text_result_for_llm
|
||||
|
||||
async def test_handler_denies_when_callback_returns_false(
|
||||
self,
|
||||
mock_client: MagicMock,
|
||||
) -> None:
|
||||
"""Falsy callback return value must deny the call and skip execution."""
|
||||
from agent_framework import Content, tool
|
||||
|
||||
invocations: list[Any] = []
|
||||
seen: list[Content] = []
|
||||
|
||||
def deny(call: Content) -> bool:
|
||||
seen.append(call)
|
||||
return False
|
||||
|
||||
@tool(approval_mode="always_require")
|
||||
def dangerous(path: str) -> str:
|
||||
"""A tool that requires human approval."""
|
||||
invocations.append(path)
|
||||
return f"deleted {path}"
|
||||
|
||||
agent = GitHubCopilotAgent(
|
||||
client=mock_client,
|
||||
default_options={"on_function_approval": deny},
|
||||
)
|
||||
copilot_tool = agent._tool_to_copilot_tool(dangerous) # type: ignore[reportPrivateUsage]
|
||||
|
||||
result = await copilot_tool.handler(ToolInvocation(arguments={"path": "/critical"}))
|
||||
|
||||
assert invocations == []
|
||||
assert len(seen) == 1
|
||||
assert seen[0].type == "function_call"
|
||||
assert seen[0].name == "dangerous" # type: ignore[attr-defined]
|
||||
assert seen[0].arguments == {"path": "/critical"} # type: ignore[attr-defined]
|
||||
assert result.result_type == "failure"
|
||||
assert result.error == "approval_denied"
|
||||
|
||||
async def test_handler_executes_when_callback_returns_true(
|
||||
self,
|
||||
mock_client: MagicMock,
|
||||
) -> None:
|
||||
"""Truthy callback return value must allow the tool to execute normally."""
|
||||
from agent_framework import Content, tool
|
||||
|
||||
def approve(call: Content) -> bool:
|
||||
return True
|
||||
|
||||
@tool(approval_mode="always_require")
|
||||
def guarded(x: int) -> str:
|
||||
"""A tool that requires human approval."""
|
||||
return f"result={x}"
|
||||
|
||||
agent = GitHubCopilotAgent(
|
||||
client=mock_client,
|
||||
default_options={"on_function_approval": approve},
|
||||
)
|
||||
copilot_tool = agent._tool_to_copilot_tool(guarded) # type: ignore[reportPrivateUsage]
|
||||
|
||||
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 42}))
|
||||
|
||||
assert result.result_type == "success"
|
||||
assert result.text_result_for_llm == "result=42"
|
||||
|
||||
async def test_handler_supports_async_callback(
|
||||
self,
|
||||
mock_client: MagicMock,
|
||||
) -> None:
|
||||
"""Async callback must be awaited and respected."""
|
||||
from agent_framework import Content, tool
|
||||
|
||||
async def approve(call: Content) -> bool:
|
||||
return True
|
||||
|
||||
@tool(approval_mode="always_require")
|
||||
def guarded(x: int) -> str:
|
||||
"""A tool that requires human approval."""
|
||||
return f"async={x}"
|
||||
|
||||
agent = GitHubCopilotAgent(
|
||||
client=mock_client,
|
||||
default_options={"on_function_approval": approve},
|
||||
)
|
||||
copilot_tool = agent._tool_to_copilot_tool(guarded) # type: ignore[reportPrivateUsage]
|
||||
|
||||
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 7}))
|
||||
|
||||
assert result.result_type == "success"
|
||||
assert result.text_result_for_llm == "async=7"
|
||||
|
||||
async def test_callback_failure_denies_safely(
|
||||
self,
|
||||
mock_client: MagicMock,
|
||||
) -> None:
|
||||
"""A callback that raises must result in denial, not in tool execution."""
|
||||
from agent_framework import Content, tool
|
||||
|
||||
invocations: list[Any] = []
|
||||
|
||||
def boom(call: Content) -> bool:
|
||||
raise RuntimeError("nope")
|
||||
|
||||
@tool(approval_mode="always_require")
|
||||
def dangerous(x: int) -> str:
|
||||
"""A tool that requires human approval."""
|
||||
invocations.append(x)
|
||||
return f"x={x}"
|
||||
|
||||
agent = GitHubCopilotAgent(
|
||||
client=mock_client,
|
||||
default_options={"on_function_approval": boom},
|
||||
)
|
||||
copilot_tool = agent._tool_to_copilot_tool(dangerous) # type: ignore[reportPrivateUsage]
|
||||
|
||||
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 1}))
|
||||
|
||||
assert invocations == []
|
||||
assert result.result_type == "failure"
|
||||
assert result.error == "approval_denied"
|
||||
|
||||
async def test_handler_does_not_invoke_callback_for_never_require(
|
||||
self,
|
||||
mock_client: MagicMock,
|
||||
) -> None:
|
||||
"""Tools without approval_mode='always_require' must not trigger the callback."""
|
||||
from agent_framework import Content, tool
|
||||
|
||||
callback_calls: list[Any] = []
|
||||
|
||||
def approve(call: Content) -> bool:
|
||||
callback_calls.append(call)
|
||||
return True
|
||||
|
||||
@tool
|
||||
def safe(x: int) -> str:
|
||||
"""A tool that does not require approval."""
|
||||
return f"safe={x}"
|
||||
|
||||
agent = GitHubCopilotAgent(
|
||||
client=mock_client,
|
||||
default_options={"on_function_approval": approve},
|
||||
)
|
||||
copilot_tool = agent._tool_to_copilot_tool(safe) # type: ignore[reportPrivateUsage]
|
||||
|
||||
result = await copilot_tool.handler(ToolInvocation(arguments={"x": 5}))
|
||||
|
||||
assert callback_calls == []
|
||||
assert result.result_type == "success"
|
||||
assert result.text_result_for_llm == "safe=5"
|
||||
|
||||
|
||||
class TestGitHubCopilotAgentErrorHandling:
|
||||
"""Test cases for error handling."""
|
||||
|
||||
@@ -2128,3 +2305,16 @@ class TestGitHubCopilotAgentContextProviders:
|
||||
await agent.run("Hello", session=session, options={"timeout": 120})
|
||||
|
||||
assert observed_options.get("timeout") == 120
|
||||
|
||||
async def test_runtime_on_function_approval_rejected(self, mock_client: MagicMock) -> None:
|
||||
"""Passing on_function_approval at runtime must raise rather than be silently ignored."""
|
||||
agent = GitHubCopilotAgent(client=mock_client)
|
||||
with pytest.raises(ValueError, match="on_function_approval"):
|
||||
await agent.run("hello", options={"on_function_approval": lambda _c: True})
|
||||
|
||||
async def test_runtime_on_function_approval_rejected_streaming(self, mock_client: MagicMock) -> None:
|
||||
"""Passing on_function_approval at runtime must raise on the streaming path too."""
|
||||
agent = GitHubCopilotAgent(client=mock_client)
|
||||
with pytest.raises(ValueError, match="on_function_approval"):
|
||||
async for _ in agent.run("hello", stream=True, options={"on_function_approval": lambda _c: True}):
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user