Python: Emit tool call events in GitHubCopilotAgent streaming (#4711)

* Emit tool call events in GitHubCopilotAgent streaming

_stream_updates now yields FunctionCallContent for TOOL_EXECUTION_START
and FunctionResultContent for TOOL_EXECUTION_COMPLETE events from the
Copilot SDK session. This enables DevUI and other consumers to display
tool calls during streaming agent execution. Previously only ASSISTANT_MESSAGE_DELTA, SESSION_IDLE, and SESSION_ERROR
were handled — tool execution events were silently dropped.

Signed-off-by: James Sturtevant <jsturtevant@gmail.com>

* Add some tests

Signed-off-by: James Sturtevant <jsturtevant@gmail.com>

* Respond to feedback

Signed-off-by: James Sturtevant <jsturtevant@gmail.com>

* Fix TOOL_EXECUTION_COMPLETE to use correct SDK types

- Read result text from session_events.Result.content (not ToolResult.text_result_for_llm)
- Read failure state from event.data.success/error (not result_obj.result_type/error)
- Handle ErrorClass.message and plain string errors
- Update tests to use session_events.Result and ErrorClass
- Add tests for string errors, success-with-error, and COMPLETE missing fields

Signed-off-by: James Sturtevant <jsturtevant@gmail.com>

---------

Signed-off-by: James Sturtevant <jsturtevant@gmail.com>
This commit is contained in:
James Sturtevant
2026-03-19 18:27:36 -07:00
committed by GitHub
Unverified
parent 0cd40f8354
commit b4c4f5094e
2 changed files with 408 additions and 1 deletions
@@ -20,7 +20,7 @@ from agent_framework import (
Message,
)
from agent_framework.exceptions import AgentException
from copilot.generated.session_events import Data, SessionEvent, SessionEventType
from copilot.generated.session_events import Data, ErrorClass, Result, SessionEvent, SessionEventType
from copilot.types import ToolInvocation, ToolResult
from agent_framework_github_copilot import GitHubCopilotAgent, GitHubCopilotOptions
@@ -463,6 +463,376 @@ class TestGitHubCopilotAgentRunStreaming:
assert agent._started is True # type: ignore
mock_client.start.assert_called_once()
async def test_run_streaming_tool_execution_start(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that TOOL_EXECUTION_START events produce function_call content."""
tool_event_data = MagicMock()
tool_event_data.tool_call_id = "call_abc123"
tool_event_data.tool_name = "get_weather"
tool_event_data.arguments = {"city": "Seattle"}
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_START,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("What's the weather?", stream=True):
responses.append(update)
assert len(responses) == 1
assert responses[0].role == "assistant"
content = responses[0].contents[0]
assert content.type == "function_call"
assert content.call_id == "call_abc123"
assert content.name == "get_weather"
assert content.arguments == {"city": "Seattle"}
assert content.raw_representation is tool_event_data
async def test_run_streaming_tool_execution_complete(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that TOOL_EXECUTION_COMPLETE events produce function_result content."""
tool_event_data = MagicMock()
tool_event_data.tool_call_id = "call_abc123"
tool_event_data.result = Result(content="Sunny, 72°F")
tool_event_data.success = True
tool_event_data.error = None
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_COMPLETE,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("What's the weather?", stream=True):
responses.append(update)
assert len(responses) == 1
assert responses[0].role == "tool"
content = responses[0].contents[0]
assert content.type == "function_result"
assert content.call_id == "call_abc123"
assert content.result == "Sunny, 72°F"
assert content.exception is None
assert content.raw_representation is tool_event_data
async def test_run_streaming_tool_execution_missing_fields(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that missing tool fields fall back to empty strings."""
tool_event_data = MagicMock(spec=[]) # No attributes
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_START,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("Hello", stream=True):
responses.append(update)
assert len(responses) == 1
content = responses[0].contents[0]
assert content.type == "function_call"
assert content.call_id == ""
assert content.name == ""
assert content.arguments is None
async def test_run_streaming_tool_result_none(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that a tool result with None result object produces empty string."""
tool_event_data = MagicMock()
tool_event_data.tool_call_id = "call_xyz"
tool_event_data.result = None
tool_event_data.success = True
tool_event_data.error = None
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_COMPLETE,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("Hello", stream=True):
responses.append(update)
assert len(responses) == 1
content = responses[0].contents[0]
assert content.type == "function_result"
assert content.call_id == "call_xyz"
assert content.result == ""
assert content.exception is None
async def test_run_streaming_tool_execution_failure(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that a failed tool result surfaces the error as exception."""
tool_event_data = MagicMock()
tool_event_data.tool_call_id = "call_fail"
tool_event_data.result = Result(content="Error: connection timeout")
tool_event_data.success = False
tool_event_data.error = ErrorClass(message="connection timeout")
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_COMPLETE,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("Hello", stream=True):
responses.append(update)
assert len(responses) == 1
content = responses[0].contents[0]
assert content.type == "function_result"
assert content.call_id == "call_fail"
assert content.result == "Error: connection timeout"
assert content.exception == "connection timeout"
async def test_run_streaming_tool_execution_failure_string_error(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that a failed tool result with a string error is surfaced."""
tool_event_data = MagicMock()
tool_event_data.tool_call_id = "call_fail2"
tool_event_data.result = Result(content="")
tool_event_data.success = False
tool_event_data.error = "something went wrong"
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_COMPLETE,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("Hello", stream=True):
responses.append(update)
assert len(responses) == 1
content = responses[0].contents[0]
assert content.type == "function_result"
assert content.call_id == "call_fail2"
assert content.exception == "something went wrong"
async def test_run_streaming_tool_execution_success_with_error_field(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that a successful tool result with error field does not propagate exception."""
tool_event_data = MagicMock()
tool_event_data.tool_call_id = "call_ok"
tool_event_data.result = Result(content="partial result")
tool_event_data.success = True
tool_event_data.error = "some warning"
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_COMPLETE,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("Hello", stream=True):
responses.append(update)
assert len(responses) == 1
content = responses[0].contents[0]
assert content.type == "function_result"
assert content.call_id == "call_ok"
assert content.result == "partial result"
assert content.exception is None
async def test_run_streaming_tool_complete_missing_fields(
self,
mock_client: MagicMock,
mock_session: MagicMock,
session_idle_event: SessionEvent,
) -> None:
"""Test that missing fields on TOOL_EXECUTION_COMPLETE fall back to defaults."""
tool_event_data = MagicMock(spec=[]) # No attributes
tool_event = SessionEvent(
data=tool_event_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_COMPLETE,
)
def mock_on(handler: Any) -> Any:
handler(tool_event)
handler(session_idle_event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("Hello", stream=True):
responses.append(update)
assert len(responses) == 1
content = responses[0].contents[0]
assert content.type == "function_result"
assert content.call_id == ""
assert content.result == ""
assert content.exception is None
async def test_run_streaming_tool_call_and_result_sequence(
self,
mock_client: MagicMock,
mock_session: MagicMock,
assistant_delta_event: SessionEvent,
session_idle_event: SessionEvent,
) -> None:
"""Test a full streaming sequence: text delta, tool call, tool result, text delta."""
# Tool call event
call_data = MagicMock()
call_data.tool_call_id = "call_001"
call_data.tool_name = "search"
call_data.arguments = {"query": "weather"}
tool_call_event = SessionEvent(
data=call_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_START,
)
# Tool result event
result_data = MagicMock()
result_data.tool_call_id = "call_001"
result_data.result = Result(content="72°F and sunny")
result_data.success = True
result_data.error = None
tool_result_event = SessionEvent(
data=result_data,
id=uuid4(),
timestamp=datetime.now(timezone.utc),
type=SessionEventType.TOOL_EXECUTION_COMPLETE,
)
# Final text delta
final_delta = create_session_event(
SessionEventType.ASSISTANT_MESSAGE_DELTA,
delta_content="The weather is sunny.",
message_id="msg-2",
)
events = [assistant_delta_event, tool_call_event, tool_result_event, final_delta, session_idle_event]
def mock_on(handler: Any) -> Any:
for event in events:
handler(event)
return lambda: None
mock_session.on = mock_on
agent = GitHubCopilotAgent(client=mock_client)
responses: list[AgentResponseUpdate] = []
async for update in agent.run("What's the weather?", stream=True):
responses.append(update)
assert len(responses) == 4
assert responses[0].role == "assistant"
assert responses[0].contents[0].type == "text"
assert responses[1].role == "assistant"
assert responses[1].contents[0].type == "function_call"
assert responses[2].role == "tool"
assert responses[2].contents[0].type == "function_result"
assert responses[3].role == "assistant"
assert responses[3].contents[0].type == "text"
class TestGitHubCopilotAgentSessionManagement:
"""Test cases for session management."""