Merge branch 'main' into feature/python-foundry-hosted-agent-vnext

This commit is contained in:
Tao Chen
2026-04-20 18:35:09 -07:00
committed by GitHub
Unverified
110 changed files with 11365 additions and 402 deletions
+2
View File
@@ -31,6 +31,7 @@
"azuredocs",
"azurefunctions",
"boto",
"codeact",
"contentvector",
"contoso",
"datamodel",
@@ -46,6 +47,7 @@
"hnsw",
"httpx",
"huggingface",
"hyperlight",
"Instrumentor",
"logit",
"logprobs",
+1
View File
@@ -33,6 +33,7 @@ Status is grouped into these buckets:
| `agent-framework-foundry-local` | `python/packages/foundry_local` | `beta` |
| `agent-framework-gemini` | `python/packages/gemini` | `alpha` |
| `agent-framework-github-copilot` | `python/packages/github_copilot` | `beta` |
| `agent-framework-hyperlight` | `python/packages/hyperlight` | `alpha` |
| `agent-framework-lab` | `python/packages/lab` | `beta` |
| `agent-framework-mem0` | `python/packages/mem0` | `beta` |
| `agent-framework-ollama` | `python/packages/ollama` | `beta` |
@@ -244,7 +244,8 @@ class CopilotStudioAgent(BaseAgent):
"""Non-streaming implementation of run."""
if not session:
session = self.create_session()
session.service_session_id = await self._start_new_conversation()
if not session.service_session_id:
session.service_session_id = await self._start_new_conversation()
input_messages = normalize_messages(messages)
@@ -271,7 +272,8 @@ class CopilotStudioAgent(BaseAgent):
nonlocal session
if not session:
session = self.create_session()
session.service_session_id = await self._start_new_conversation()
if not session.service_session_id:
session.service_session_id = await self._start_new_conversation()
input_messages = normalize_messages(messages)
@@ -245,6 +245,47 @@ class TestCopilotStudioAgent:
assert response_count == 1
assert session.service_session_id == "test-conversation-id"
async def test_run_reuses_existing_conversation(
self, mock_copilot_client: MagicMock, mock_activity: MagicMock
) -> None:
"""Test run method reuses an existing conversation ID from the session."""
agent = CopilotStudioAgent(client=mock_copilot_client)
session = AgentSession()
session.service_session_id = "existing-conversation-id"
mock_copilot_client.ask_question.return_value = create_async_generator([mock_activity])
response = await agent.run("test message", session=session)
assert isinstance(response, AgentResponse)
assert session.service_session_id == "existing-conversation-id"
mock_copilot_client.start_conversation.assert_not_called()
mock_copilot_client.ask_question.assert_called_once_with("test message", "existing-conversation-id")
async def test_run_streaming_reuses_existing_conversation(self, mock_copilot_client: MagicMock) -> None:
"""Test run(stream=True) method reuses an existing conversation ID from the session."""
agent = CopilotStudioAgent(client=mock_copilot_client)
session = AgentSession()
session.service_session_id = "existing-conversation-id"
typing_activity = MagicMock()
typing_activity.text = "Streaming response"
typing_activity.type = "typing"
typing_activity.id = "test-typing-id"
typing_activity.from_property.name = "Test Bot"
mock_copilot_client.ask_question.return_value = create_async_generator([typing_activity])
response_count = 0
async for response in agent.run("test message", session=session, stream=True):
assert isinstance(response, AgentResponseUpdate)
response_count += 1
assert response_count == 1
assert session.service_session_id == "existing-conversation-id"
mock_copilot_client.start_conversation.assert_not_called()
mock_copilot_client.ask_question.assert_called_once_with("test message", "existing-conversation-id")
async def test_run_streaming_no_typing_activity(self, mock_copilot_client: MagicMock) -> None:
"""Test run(stream=True) method with non-typing activity."""
agent = CopilotStudioAgent(client=mock_copilot_client)
@@ -49,6 +49,7 @@ class ExperimentalFeature(str, Enum):
EVALS = "EVALS"
FILE_HISTORY = "FILE_HISTORY"
SKILLS = "SKILLS"
TOOLBOXES = "TOOLBOXES"
class ReleaseCandidateFeature(str, Enum):
+33 -4
View File
@@ -12,6 +12,7 @@ from collections.abc import (
AsyncIterable,
Awaitable,
Callable,
Iterable,
Mapping,
Sequence,
)
@@ -89,6 +90,7 @@ logger = logging.getLogger("agent_framework")
DEFAULT_MAX_ITERATIONS: Final[int] = 40
DEFAULT_MAX_CONSECUTIVE_ERRORS_PER_REQUEST: Final[int] = 3
SHELL_TOOL_KIND_VALUE: Final[str] = "shell"
ApprovalMode: TypeAlias = Literal["always_require", "never_require"]
ChatClientT = TypeVar("ChatClientT", bound="SupportsChatGetResponse[Any]")
ResponseModelBoundT = TypeVar("ResponseModelBoundT", bound=BaseModel)
@@ -270,7 +272,7 @@ class FunctionTool(SerializationMixin):
*,
name: str,
description: str = "",
approval_mode: Literal["always_require", "never_require"] | None = None,
approval_mode: ApprovalMode | None = None,
kind: str | None = None,
max_invocations: int | None = None,
max_invocation_exceptions: int | None = None,
@@ -858,6 +860,15 @@ def normalize_tools(
Returns:
A normalized list where callable inputs are converted to ``FunctionTool``
using :func:`tool`, and existing tool objects are passed through unchanged.
Tool-collection wrappers are flattened in two forms:
- non-tool, non-callable iterables
- mapping-like objects that expose a ``.tools`` collection (for example
``ToolboxVersionObject`` from azure-ai-projects)
This lets callers write ``tools=[toolbox, my_func]`` and have the
toolbox's contents spread in alongside individual tools.
"""
if not tools:
return []
@@ -882,6 +893,24 @@ def normalize_tools(
if callable(tool_item): # type: ignore[reportUnknownArgumentType]
normalized.append(tool(tool_item))
continue
# Mapping-like tool collections (for example ToolboxVersionObject) are
# not flattened by the generic Iterable branch below because they are
# also Mapping instances. If they expose a ``tools`` collection, spread
# that collection into the normalized list.
collection_tools = getattr(tool_item, "tools", None) # type: ignore[reportUnknownArgumentType]
if isinstance(collection_tools, Iterable) and not isinstance(
collection_tools, (str, bytes, bytearray, Mapping)
):
normalized.extend(normalize_tools(list(collection_tools))) # type: ignore[reportUnknownArgumentType]
continue
# Tool-collection wrapper (e.g. FoundryToolbox): a non-tool, non-callable
# iterable. Flatten its contents so ``tools=[toolbox, my_func]`` works.
# Strings, mappings, and Pydantic BaseModel are excluded — BaseModel
# instances iterate over (field, value) tuples, not tools, so they
# should pass through as leaf tool specs (handled below).
if isinstance(tool_item, Iterable) and not isinstance(tool_item, (str, bytes, bytearray, Mapping, BaseModel)):
normalized.extend(normalize_tools(list(tool_item))) # type: ignore[reportUnknownArgumentType]
continue
normalized.append(tool_item) # type: ignore[reportUnknownArgumentType]
return normalized
@@ -1033,7 +1062,7 @@ def tool(
name: str | None = None,
description: str | None = None,
schema: type[BaseModel] | Mapping[str, Any] | None = None,
approval_mode: Literal["always_require", "never_require"] | None = None,
approval_mode: ApprovalMode | None = None,
kind: str | None = None,
max_invocations: int | None = None,
max_invocation_exceptions: int | None = None,
@@ -1049,7 +1078,7 @@ def tool(
name: str | None = None,
description: str | None = None,
schema: type[BaseModel] | Mapping[str, Any] | None = None,
approval_mode: Literal["always_require", "never_require"] | None = None,
approval_mode: ApprovalMode | None = None,
kind: str | None = None,
max_invocations: int | None = None,
max_invocation_exceptions: int | None = None,
@@ -1064,7 +1093,7 @@ def tool(
name: str | None = None,
description: str | None = None,
schema: type[BaseModel] | Mapping[str, Any] | None = None,
approval_mode: Literal["always_require", "never_require"] | None = None,
approval_mode: ApprovalMode | None = None,
kind: str | None = None,
max_invocations: int | None = None,
max_invocation_exceptions: int | None = None,
+53 -1
View File
@@ -351,6 +351,8 @@ ContentType = Literal[
"image_generation_tool_result",
"mcp_server_tool_call",
"mcp_server_tool_result",
"search_tool_call",
"search_tool_result",
"shell_tool_call",
"shell_tool_result",
"shell_command_output",
@@ -864,6 +866,56 @@ class Content:
raw_representation=raw_representation,
)
@classmethod
def from_search_tool_call(
cls: type[ContentT],
call_id: str,
*,
tool_name: str,
arguments: str | Mapping[str, Any] | None = None,
status: str | None = None,
annotations: Sequence[Annotation] | None = None,
additional_properties: MutableMapping[str, Any] | None = None,
raw_representation: Any = None,
) -> ContentT:
"""Create search tool call content."""
return cls(
"search_tool_call",
call_id=call_id,
tool_name=tool_name,
arguments=arguments,
status=status,
annotations=annotations,
additional_properties=additional_properties,
raw_representation=raw_representation,
)
@classmethod
def from_search_tool_result(
cls: type[ContentT],
call_id: str,
*,
tool_name: str,
result: Any = None,
items: Sequence[Content] | None = None,
status: str | None = None,
annotations: Sequence[Annotation] | None = None,
additional_properties: MutableMapping[str, Any] | None = None,
raw_representation: Any = None,
) -> ContentT:
"""Create search tool result content."""
return cls(
"search_tool_result",
call_id=call_id,
tool_name=tool_name,
result=result,
items=list(items) if items is not None else None,
status=status,
annotations=annotations,
additional_properties=additional_properties,
raw_representation=raw_representation,
)
@classmethod
def from_usage(
cls: type[ContentT],
@@ -1478,7 +1530,7 @@ class Content:
return span.lower() == top_level_media_type.lower()
def parse_arguments(self) -> dict[str, Any | None] | None:
"""Parse arguments from function_call or mcp_server_tool_call content.
"""Parse arguments from function_call, mcp_server_tool_call, or search_tool_call content.
If arguments cannot be parsed as JSON or the result is not a dict,
they are returned as a dictionary with a single key "raw".
@@ -20,6 +20,7 @@ _IMPORTS: dict[str, tuple[str, str]] = {
"FoundryEmbeddingOptions": ("agent_framework_foundry", "agent-framework-foundry"),
"FoundryEmbeddingSettings": ("agent_framework_foundry", "agent-framework-foundry"),
"FoundryEvals": ("agent_framework_foundry", "agent-framework-foundry"),
"FoundryHostedToolType": ("agent_framework_foundry", "agent-framework-foundry"),
"FoundryMemoryProvider": ("agent_framework_foundry", "agent-framework-foundry"),
"FoundryLocalChatOptions": ("agent_framework_foundry_local", "agent-framework-foundry-local"),
"FoundryLocalClient": ("agent_framework_foundry_local", "agent-framework-foundry-local"),
@@ -31,6 +32,9 @@ _IMPORTS: dict[str, tuple[str, str]] = {
"RawFoundryEmbeddingClient": ("agent_framework_foundry", "agent-framework-foundry"),
"evaluate_foundry_target": ("agent_framework_foundry", "agent-framework-foundry"),
"evaluate_traces": ("agent_framework_foundry", "agent-framework-foundry"),
"get_toolbox_tool_name": ("agent_framework_foundry", "agent-framework-foundry"),
"get_toolbox_tool_type": ("agent_framework_foundry", "agent-framework-foundry"),
"select_toolbox_tools": ("agent_framework_foundry", "agent-framework-foundry"),
}
@@ -12,6 +12,7 @@ from agent_framework_foundry import (
FoundryEmbeddingOptions,
FoundryEmbeddingSettings,
FoundryEvals,
FoundryHostedToolType,
FoundryMemoryProvider,
RawFoundryAgent,
RawFoundryAgentChatClient,
@@ -19,6 +20,9 @@ from agent_framework_foundry import (
RawFoundryEmbeddingClient,
evaluate_foundry_target,
evaluate_traces,
get_toolbox_tool_name,
get_toolbox_tool_type,
select_toolbox_tools,
)
from agent_framework_foundry_local import (
FoundryLocalChatOptions,
@@ -35,6 +39,7 @@ __all__ = [
"FoundryEmbeddingOptions",
"FoundryEmbeddingSettings",
"FoundryEvals",
"FoundryHostedToolType",
"FoundryLocalChatOptions",
"FoundryLocalClient",
"FoundryLocalSettings",
@@ -46,4 +51,7 @@ __all__ = [
"RawFoundryEmbeddingClient",
"evaluate_foundry_target",
"evaluate_traces",
"get_toolbox_tool_name",
"get_toolbox_tool_type",
"select_toolbox_tools",
]
@@ -1144,3 +1144,160 @@ def test_parse_annotation_with_annotated_and_literal():
# endregion
# region normalize_tools flattening of tool-collection wrappers
def _make_flatten_function_tool(name: str) -> FunctionTool:
"""Build a FunctionTool for flattening tests."""
@tool(name=name, description=f"{name} tool")
def _impl(x: int) -> int:
return x
return _impl # type: ignore[return-value]
def test_normalize_tools_flattens_tool_collection_wrapper() -> None:
"""A non-tool, non-callable iterable inside the tools list is flattened."""
from agent_framework._tools import normalize_tools
inner_a = _make_flatten_function_tool("inner_a")
inner_b = _make_flatten_function_tool("inner_b")
class ToolBundle:
"""Minimal stand-in for a tool-collection wrapper like FoundryToolbox."""
def __init__(self, tools: list[FunctionTool]) -> None:
self._tools = tools
def __iter__(self):
return iter(self._tools)
bundle = ToolBundle([inner_a, inner_b])
normalized = normalize_tools([bundle])
assert len(normalized) == 2
assert normalized[0] is inner_a
assert normalized[1] is inner_b
def test_normalize_tools_combines_bundle_with_individual_tools() -> None:
"""The canonical ``tools=[bundle, my_func]`` call site spreads bundle + individual."""
from agent_framework._tools import normalize_tools
bundled = _make_flatten_function_tool("bundled")
standalone = _make_flatten_function_tool("standalone")
class ToolBundle:
def __init__(self, tools: list[FunctionTool]) -> None:
self._tools = tools
def __iter__(self):
return iter(self._tools)
normalized = normalize_tools([ToolBundle([bundled]), standalone])
assert len(normalized) == 2
assert normalized[0] is bundled
assert normalized[1] is standalone
def test_normalize_tools_flattens_nested_bundles() -> None:
"""Bundles inside bundles are flattened recursively via the recursive call."""
from agent_framework._tools import normalize_tools
inner = _make_flatten_function_tool("deep")
class ToolBundle:
def __init__(self, tools: list[Any]) -> None:
self._tools = tools
def __iter__(self):
return iter(self._tools)
nested = ToolBundle([ToolBundle([inner])])
normalized = normalize_tools([nested])
assert len(normalized) == 1
assert normalized[0] is inner
def test_normalize_tools_bundle_only_form() -> None:
"""Passing a bundle directly (no outer list) also flattens its contents.
``tools=bundle`` — the outer wrap-in-list happens in the non-Sequence
branch, then the flattening logic kicks in on the inner pass.
"""
from agent_framework._tools import normalize_tools
a = _make_flatten_function_tool("a")
b = _make_flatten_function_tool("b")
class ToolBundle:
def __init__(self, tools: list[FunctionTool]) -> None:
self._tools = tools
def __iter__(self):
return iter(self._tools)
normalized = normalize_tools(ToolBundle([a, b])) # type: ignore[arg-type]
assert len(normalized) == 2
assert normalized[0] is a
assert normalized[1] is b
def test_normalize_tools_does_not_flatten_known_tool_types() -> None:
"""FunctionTool / dict / callable are detected before the flatten branch."""
from agent_framework._tools import normalize_tools
func_tool = _make_flatten_function_tool("ft")
dict_tool: dict[str, Any] = {"type": "code_interpreter", "container": {"type": "auto"}}
def plain_callable(x: int) -> int:
return x
normalized = normalize_tools([func_tool, dict_tool, plain_callable])
assert len(normalized) == 3
assert normalized[0] is func_tool
assert normalized[1] is dict_tool
# plain_callable was wrapped in a FunctionTool via the @tool helper
assert isinstance(normalized[2], FunctionTool)
def test_normalize_tools_flattens_mapping_like_toolbox_with_tools_attr() -> None:
"""Mapping-like toolbox objects with ``.tools`` should still flatten."""
from collections.abc import Mapping as MappingABC
from agent_framework._tools import normalize_tools
bundled = _make_flatten_function_tool("bundled")
standalone = _make_flatten_function_tool("standalone")
class ToolBundleMapping(MappingABC[str, Any]):
def __init__(self, tools: list[FunctionTool]) -> None:
self.tools = tools
self._data = {"name": "research_tools", "version": "v1", "tools": tools}
def __getitem__(self, key: str) -> Any:
return self._data[key]
def __iter__(self):
return iter(self._data)
def __len__(self) -> int:
return len(self._data)
normalized = normalize_tools([ToolBundleMapping([bundled]), standalone])
assert len(normalized) == 2
assert normalized[0] is bundled
assert normalized[1] is standalone
# endregion
+63
View File
@@ -1,3 +1,66 @@
# Agent Framework Foundry
This package contains the Microsoft Foundry integrations for Microsoft Agent Framework, including Foundry chat clients, preconfigured Foundry agents, Foundry embedding clients, and Foundry memory providers.
## Toolboxes
A *toolbox* is a named, versioned bundle of hosted tool configurations — code interpreter, file search, image generation, MCP, web search, and so on — stored inside a Microsoft Foundry project. Toolboxes let you manage tool configuration once and reuse it across agents.
### Authoring a toolbox
Toolboxes can be authored two ways:
- **Foundry portal** — create and version toolboxes through the UI without touching code.
- **Programmatically** — use the [`azure-ai-projects`](https://pypi.org/project/azure-ai-projects/) SDK to create, update, and version toolboxes from Python.
> Toolbox authoring APIs (`ToolboxVersionObject`, `ToolboxObject`, `project_client.beta.toolboxes.*`) require `azure-ai-projects>=2.1.0`. Earlier versions can only consume toolboxes that already exist.
### Using toolboxes with `FoundryAgent`
For hosted `FoundryAgent`, the toolbox must already be attached to the agent in the Microsoft Foundry project. Once attached, the agent invokes its toolbox tools transparently — no client-side wiring required — and you interact with the agent the same way you would with any other tool-equipped Foundry agent.
### Using toolboxes with `FoundryChatClient`
There are two patterns for wiring a toolbox into a `FoundryChatClient`-backed agent.
**1. Fetch, optionally filter, and pass the tools directly**
Load the toolbox from the Microsoft Foundry project, optionally select a subset of its tools, and hand them to an `Agent` alongside any other tools you own:
```python
from agent_framework import Agent
from agent_framework.foundry import FoundryChatClient, select_toolbox_tools
client = FoundryChatClient(...)
toolbox = await client.get_toolbox("my-toolbox", version="3")
# Pass the whole toolbox:
agent = Agent(client=client, tools=toolbox)
# Or filter to a subset first:
selected = select_toolbox_tools(toolbox, include_types=["code_interpreter", "mcp"])
agent = Agent(client=client, tools=selected)
```
See [`foundry_chat_client_with_toolbox.py`](../../samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox.py) for a full example, including combining multiple toolboxes.
**2. Connect to the toolbox's MCP endpoint with `MCPStreamableHTTPTool`**
Each toolbox is reachable as an MCP server. Instead of fetching and fanning out its individual tool definitions, you can point a MAF `MCPStreamableHTTPTool` at the toolbox's MCP endpoint — the agent then discovers and calls its tools over MCP at runtime:
```python
from agent_framework import Agent, MCPStreamableHTTPTool
from agent_framework.foundry import FoundryChatClient
async with Agent(
client=FoundryChatClient(...),
instructions="You are a helpful assistant. Use the toolbox tools when useful.",
tools=MCPStreamableHTTPTool(
name="my_toolbox",
description="Tools served by my Foundry toolbox",
url="https://<your-toolbox-mcp-endpoint>",
),
) as agent:
result = await agent.run("What tools are available?")
print(result.text)
```
@@ -16,6 +16,7 @@ from ._foundry_evals import (
evaluate_traces,
)
from ._memory_provider import FoundryMemoryProvider
from ._tools import FoundryHostedToolType, get_toolbox_tool_name, get_toolbox_tool_type, select_toolbox_tools
try:
__version__ = importlib.metadata.version(__name__)
@@ -30,6 +31,7 @@ __all__ = [
"FoundryEmbeddingOptions",
"FoundryEmbeddingSettings",
"FoundryEvals",
"FoundryHostedToolType",
"FoundryMemoryProvider",
"RawFoundryAgent",
"RawFoundryAgentChatClient",
@@ -38,4 +40,7 @@ __all__ = [
"__version__",
"evaluate_foundry_target",
"evaluate_traces",
"get_toolbox_tool_name",
"get_toolbox_tool_type",
"select_toolbox_tools",
]
@@ -34,6 +34,8 @@ from azure.ai.projects.aio import AIProjectClient
from azure.core.credentials import TokenCredential
from azure.core.credentials_async import AsyncTokenCredential
from ._tools import sanitize_foundry_response_tool
if sys.version_info >= (3, 13):
from typing import TypeVar # type: ignore # pragma: no cover
else:
@@ -307,6 +309,20 @@ class RawFoundryAgentChatClient( # type: ignore[misc]
"""Skip model check — model is configured on the Foundry agent."""
pass
@override
def _prepare_tools_for_openai(
self,
tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None,
) -> list[Any]:
"""Prepare tools for Foundry agent Responses API calls.
Mirrors ``RawFoundryChatClient`` sanitization so toolbox-fetched MCP
tools with extra read-model fields continue to work through the agent
surface.
"""
response_tools = super()._prepare_tools_for_openai(tools)
return [sanitize_foundry_response_tool(tool_item) for tool_item in response_tools]
def _prepare_messages_for_azure_ai(self, messages: Sequence[Message]) -> tuple[list[Message], str | None]:
"""Extract system/developer messages as instructions for Azure AI.
@@ -16,6 +16,7 @@ from agent_framework import (
load_settings,
)
from agent_framework._compaction import CompactionStrategy, TokenizerProtocol
from agent_framework._feature_stage import ExperimentalFeature, experimental
from agent_framework.observability import ChatTelemetryLayer
from agent_framework_openai._chat_client import OpenAIChatOptions, RawOpenAIChatClient
from azure.ai.projects.aio import AIProjectClient
@@ -32,6 +33,8 @@ from azure.ai.projects.models import MCPTool as FoundryMCPTool
from azure.core.credentials import TokenCredential
from azure.core.credentials_async import AsyncTokenCredential
from ._tools import fetch_toolbox, sanitize_foundry_response_tool
if sys.version_info >= (3, 13):
from typing import TypeVar # type: ignore # pragma: no cover
else:
@@ -46,7 +49,8 @@ else:
from typing_extensions import TypedDict # type: ignore # pragma: no cover
if TYPE_CHECKING:
from agent_framework import ChatAndFunctionMiddlewareTypes
from agent_framework import ChatAndFunctionMiddlewareTypes, ToolTypes
from azure.ai.projects.models import ToolboxVersionObject
logger: logging.Logger = logging.getLogger("agent_framework.foundry")
@@ -218,6 +222,21 @@ class RawFoundryChatClient( # type: ignore[misc]
raise ValueError("model must be a non-empty string")
options["model"] = self.model
@override
def _prepare_tools_for_openai(
self,
tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None,
) -> list[Any]:
"""Prepare tools for Foundry Responses API calls.
Foundry toolbox reads can surface MCP tool objects with extra fields
(for example ``name``) that are accepted by the toolbox API but rejected
by the Responses API. Sanitize those hosted-tool payloads before sending
them downstream.
"""
response_tools = super()._prepare_tools_for_openai(tools)
return [sanitize_foundry_response_tool(tool_item) for tool_item in response_tools]
async def configure_azure_monitor(
self,
enable_sensitive_data: bool = False,
@@ -460,6 +479,37 @@ class RawFoundryChatClient( # type: ignore[misc]
# endregion
# region Toolbox methods (instance methods — these hit the network)
@experimental(feature_id=ExperimentalFeature.TOOLBOXES)
async def get_toolbox(
self,
name: str,
*,
version: str | None = None,
) -> ToolboxVersionObject:
"""Fetch a Foundry toolbox by name.
If ``version`` is omitted, resolves the toolbox's current default version
(two requests). If ``version`` is specified, fetches that version directly
(single request).
Args:
name: The name of the toolbox.
Keyword Args:
version: Optional immutable version identifier to pin to.
Returns:
A ``ToolboxVersionObject``. Pass its ``tools`` attribute to
``Agent(tools=toolbox.tools)``.
Raises:
azure.core.exceptions.ResourceNotFoundError: If the toolbox or
the requested version does not exist.
"""
return await fetch_toolbox(self.project_client, name, version)
class FoundryChatClient( # type: ignore[misc]
FunctionInvocationLayer[FoundryChatOptionsT],
@@ -0,0 +1,166 @@
# Copyright (c) Microsoft. All rights reserved.
"""Shared tool helpers for Foundry chat clients.
Includes:
* *Toolbox* helpers — a *toolbox* is a named, versioned bundle of tool
definitions stored in an Azure AI Foundry project.
* Responses-API payload sanitization for Foundry hosted tools.
"""
from __future__ import annotations
from collections.abc import Callable, Collection, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Literal, TypeAlias, cast
from agent_framework._feature_stage import ExperimentalFeature, experimental
from azure.ai.projects.models import MCPTool as FoundryMCPTool
if TYPE_CHECKING:
from azure.ai.projects.aio import AIProjectClient
from azure.ai.projects.models import Tool, ToolboxVersionObject
FoundryHostedToolType: TypeAlias = (
Literal[
"code_interpreter",
"file_search",
"image_generation",
"mcp",
"web_search",
]
| str
)
ToolboxToolSelectionInput: TypeAlias = "ToolboxVersionObject | Sequence[Tool | dict[str, Any]]"
@experimental(feature_id=ExperimentalFeature.TOOLBOXES)
async def fetch_toolbox(
project_client: AIProjectClient,
name: str,
version: str | None = None,
) -> ToolboxVersionObject:
"""Fetch a toolbox version via an ``AIProjectClient``.
If ``version`` is omitted, resolves the toolbox's current default
version (two requests: one to ``.get(name)`` for the default version
pointer, one to ``.get_version(name, version)`` for the tools). If
``version`` is specified, fetches that version directly (single request).
"""
if version is None:
handle = await project_client.beta.toolboxes.get(name)
version = handle.default_version
return await project_client.beta.toolboxes.get_version(name, version)
@experimental(feature_id=ExperimentalFeature.TOOLBOXES)
def get_toolbox_tool_name(tool: Tool | dict[str, Any]) -> str | None:
"""Return the best-effort display/selection name for a toolbox tool.
Selection precedence:
1. MCP ``server_label``
2. Generic tool ``name``
3. Tool ``type``
"""
if isinstance(tool, dict):
if server_label := tool.get("server_label"):
return str(server_label)
if name := tool.get("name"):
return str(name)
if tool_type := tool.get("type"):
return str(tool_type)
return None
if server_label := getattr(tool, "server_label", None):
return str(server_label)
if name := getattr(tool, "name", None):
return str(name)
if tool_type := getattr(tool, "type", None):
return str(tool_type)
return None
@experimental(feature_id=ExperimentalFeature.TOOLBOXES)
def get_toolbox_tool_type(tool: Tool | dict[str, Any]) -> str | None:
"""Return the raw tool ``type`` if present."""
tool_type = tool.get("type") if isinstance(tool, dict) else getattr(tool, "type", None)
return str(tool_type) if tool_type is not None else None
@experimental(feature_id=ExperimentalFeature.TOOLBOXES)
def select_toolbox_tools(
tools: ToolboxToolSelectionInput,
*,
include_names: Collection[str] | None = None,
exclude_names: Collection[str] | None = None,
include_types: Collection[FoundryHostedToolType] | None = None,
exclude_types: Collection[FoundryHostedToolType] | None = None,
predicate: Callable[[Tool | dict[str, Any]], bool] | None = None,
) -> list[Tool | dict[str, Any]]:
"""Filter toolbox tools by normalized name, raw type, and/or predicate.
Normalized name precedence:
1. ``server_label`` for MCP tools
2. ``name``
3. ``type``
"""
tool_items: Sequence[Tool | dict[str, Any]] = (
tools if isinstance(tools, Sequence) else cast("Sequence[Tool | dict[str, Any]]", tools.tools)
)
include_name_set = {str(item) for item in include_names} if include_names is not None else None
exclude_name_set = {str(item) for item in exclude_names} if exclude_names is not None else None
include_type_set = {str(item) for item in include_types} if include_types is not None else None
exclude_type_set = {str(item) for item in exclude_types} if exclude_types is not None else None
selected: list[Tool | dict[str, Any]] = []
for tool in tool_items:
tool_name = get_toolbox_tool_name(tool)
tool_type = get_toolbox_tool_type(tool)
if include_name_set is not None and tool_name not in include_name_set:
continue
if exclude_name_set is not None and tool_name in exclude_name_set:
continue
if include_type_set is not None and tool_type not in include_type_set:
continue
if exclude_type_set is not None and tool_type in exclude_type_set:
continue
if predicate is not None and not predicate(tool):
continue
selected.append(tool)
return selected
@experimental(feature_id=ExperimentalFeature.TOOLBOXES)
def sanitize_foundry_response_tool(tool_item: Any) -> Any:
"""Return a Responses-API-safe tool payload for Foundry hosted tools.
Azure AI Projects toolbox reads can currently return hosted tool objects with
extra read-model decoration fields such as top-level ``name`` and
``description``. Azure AI Foundry rejects at least ``name`` on Responses API
requests with:
``Unknown parameter: 'tools[0].name'``.
We defensively strip these decoration fields for non-function hosted tools so
the round-trip
``toolbox.tools -> Agent(..., tools=...) -> run()`` works, while the Azure
SDK/service behavior is corrected upstream.
"""
if isinstance(tool_item, FoundryMCPTool):
sanitized: dict[str, Any] = dict(cast("Mapping[str, Any]", tool_item))
sanitized.pop("name", None)
sanitized.pop("description", None)
return sanitized
if isinstance(tool_item, Mapping):
mapping = cast("Mapping[str, Any]", tool_item)
if "type" in mapping and mapping.get("type") not in {"function", "custom"}:
sanitized = dict(mapping)
sanitized.pop("name", None)
sanitized.pop("description", None)
return sanitized
return cast(Any, tool_item)
+1 -1
View File
@@ -26,7 +26,7 @@ dependencies = [
"agent-framework-core>=1.0.1,<2",
"agent-framework-openai>=1.0.1,<2",
"azure-ai-inference>=1.0.0b9,<1.0.0b10",
"azure-ai-projects>=2.0.0,<3.0",
"azure-ai-projects>=2.1.0,<3.0",
]
[tool.uv]
@@ -15,6 +15,7 @@ from agent_framework import ChatResponse, Content, Message, SupportsChatGetRespo
from agent_framework._telemetry import AGENT_FRAMEWORK_USER_AGENT
from agent_framework.exceptions import ChatClientException, ChatClientInvalidRequestException
from agent_framework_openai import OpenAIContentFilterException
from azure.ai.projects.models import MCPTool as FoundryMCPTool
from azure.core.exceptions import ResourceNotFoundError
from azure.identity import AzureCliCredential
from openai import BadRequestError
@@ -608,6 +609,82 @@ def test_get_mcp_tool_with_project_connection_id() -> None:
assert tool_config["server_label"] == "Docs_MCP"
def test_prepare_tools_for_openai_strips_extraneous_name_from_foundry_mcp_tool() -> None:
"""Toolbox-returned MCP tools may carry ``name``; Foundry Responses rejects it."""
project_client = MagicMock()
project_client.get_openai_client.return_value = _make_mock_openai_client()
client = FoundryChatClient(project_client=project_client, model="test-model")
tool = FoundryMCPTool(
server_label="githubmcp",
server_url="https://api.githubcopilot.com/mcp",
)
tool["project_connection_id"] = "githubmcp"
tool["name"] = "githubmcp"
response_tools = client._prepare_tools_for_openai([tool])
assert len(response_tools) == 1
prepared = response_tools[0]
assert prepared["type"] == "mcp"
assert prepared["server_label"] == "githubmcp"
assert prepared["project_connection_id"] == "githubmcp"
assert "name" not in prepared
def test_prepare_tools_for_openai_strips_read_model_fields_from_toolbox_code_interpreter() -> None:
"""Toolbox-returned code interpreter tools may carry read-model-only name/description."""
project_client = MagicMock()
project_client.get_openai_client.return_value = _make_mock_openai_client()
client = FoundryChatClient(project_client=project_client, model="test-model")
tool = {
"type": "code_interpreter",
"name": "code_interpreter_t6bbtm",
"description": "Toolbox read model description",
"container": {"file_ids": [], "type": "auto"},
}
response_tools = client._prepare_tools_for_openai([tool])
assert len(response_tools) == 1
prepared = response_tools[0]
assert prepared["type"] == "code_interpreter"
assert prepared["container"] == {"file_ids": [], "type": "auto"}
assert "name" not in prepared
assert "description" not in prepared
def test_prepare_tools_for_openai_strips_name_from_non_function_hosted_tool_dicts() -> None:
"""All non-function hosted tool payloads should drop top-level read-model names."""
project_client = MagicMock()
project_client.get_openai_client.return_value = _make_mock_openai_client()
client = FoundryChatClient(project_client=project_client, model="test-model")
response_tools = client._prepare_tools_for_openai([
{
"type": "file_search",
"name": "file_search_tool_123",
"description": "toolbox decoration",
"vector_store_ids": ["vs_123"],
},
{
"type": "web_search",
"name": "web_search_tool_456",
"description": "toolbox decoration",
},
])
assert len(response_tools) == 2
assert response_tools[0]["type"] == "file_search"
assert response_tools[0]["vector_store_ids"] == ["vs_123"]
assert "name" not in response_tools[0]
assert "description" not in response_tools[0]
assert response_tools[1]["type"] == "web_search"
assert "name" not in response_tools[1]
assert "description" not in response_tools[1]
@pytest.mark.flaky
@pytest.mark.integration
@skip_if_foundry_integration_tests_disabled
@@ -0,0 +1,435 @@
# Copyright (c) Microsoft. All rights reserved.
"""Unit tests for toolbox helpers on FoundryChatClient.
Return types are the raw azure-ai-projects SDK models (ToolboxVersionObject,
ToolboxObject) — no custom wrapper. Tests verify the chat-client get path and
tool-selection ergonomics.
"""
from __future__ import annotations
import datetime as dt
import os
from typing import Any
from unittest.mock import AsyncMock, MagicMock
import pytest
try:
from azure.ai.projects.models import (
AutoCodeInterpreterToolParam,
CodeInterpreterTool,
Tool,
ToolboxObject,
ToolboxVersionObject,
)
except ImportError:
pytest.skip(
"Toolbox types require azure-ai-projects>=2.1.0 (unreleased).",
allow_module_level=True,
)
from azure.core.exceptions import ResourceNotFoundError
from azure.identity import AzureCliCredential
# --------------------------------------------------------------------------- #
# Helpers #
# --------------------------------------------------------------------------- #
class _AsyncIter:
"""Minimal async-iterable for mocking ``AsyncItemPaged`` in tests."""
def __init__(self, items: list[Any]) -> None:
self._items = items
def __aiter__(self) -> _AsyncIter:
self._iter = iter(self._items)
return self
async def __anext__(self) -> Any:
try:
return next(self._iter)
except StopIteration:
raise StopAsyncIteration from None
def _make_code_interpreter() -> CodeInterpreterTool:
return CodeInterpreterTool(container=AutoCodeInterpreterToolParam())
def _make_version_object(
*,
name: str = "research_tools",
version: str = "v1",
tools: list[Tool] | None = None,
description: str | None = None,
) -> ToolboxVersionObject:
return ToolboxVersionObject(
id=f"tbv_{name}_{version}",
name=name,
version=version,
metadata={},
created_at=dt.datetime(2026, 4, 10, tzinfo=dt.timezone.utc),
tools=tools if tools is not None else [_make_code_interpreter()],
description=description,
)
def _make_mock_foundry_client(*, project_client: MagicMock) -> Any:
"""Build a FoundryChatClient wired to a mock project_client."""
from agent_framework_foundry import FoundryChatClient
project_client.get_openai_client = MagicMock(return_value=MagicMock())
return FoundryChatClient(project_client=project_client, model="test-model")
# --------------------------------------------------------------------------- #
# get_toolbox — explicit version path #
# --------------------------------------------------------------------------- #
async def test_get_toolbox_with_explicit_version_makes_single_request() -> None:
project_client = MagicMock()
version_obj = _make_version_object(name="research_tools", version="v3")
project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj)
project_client.beta.toolboxes.get = AsyncMock(
side_effect=AssertionError("get() must not be called when version is explicit")
)
client = _make_mock_foundry_client(project_client=project_client)
toolbox = await client.get_toolbox("research_tools", version="v3")
assert isinstance(toolbox, ToolboxVersionObject)
assert toolbox.name == "research_tools"
assert toolbox.version == "v3"
project_client.beta.toolboxes.get_version.assert_awaited_once_with("research_tools", "v3")
project_client.beta.toolboxes.get.assert_not_called()
# --------------------------------------------------------------------------- #
# get_toolbox — default-version path + error + passthrough + smoke #
# --------------------------------------------------------------------------- #
async def test_get_toolbox_default_version_resolves_then_fetches() -> None:
project_client = MagicMock()
handle = ToolboxObject(id="tb_1", name="research_tools", default_version="v5")
version_obj = _make_version_object(name="research_tools", version="v5")
project_client.beta.toolboxes.get = AsyncMock(return_value=handle)
project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj)
client = _make_mock_foundry_client(project_client=project_client)
toolbox = await client.get_toolbox("research_tools")
assert toolbox.version == "v5"
project_client.beta.toolboxes.get.assert_awaited_once_with("research_tools")
project_client.beta.toolboxes.get_version.assert_awaited_once_with("research_tools", "v5")
async def test_get_toolbox_propagates_resource_not_found() -> None:
project_client = MagicMock()
project_client.beta.toolboxes.get = AsyncMock(side_effect=ResourceNotFoundError("no such toolbox"))
client = _make_mock_foundry_client(project_client=project_client)
with pytest.raises(ResourceNotFoundError):
await client.get_toolbox("missing_toolbox")
async def test_get_toolbox_tool_passthrough_preserves_heterogeneous_types() -> None:
"""Ensure all Tool subclasses pass through unchanged — critical for MCP tools
with project_connection_id, which must reach the runtime untouched."""
from azure.ai.projects.models import MCPTool as FoundryMCPTool
mcp_tool = FoundryMCPTool(
server_label="github_oauth",
server_url="https://api.githubcopilot.com/mcp",
)
mcp_tool["project_connection_id"] = "conn_abc"
project_client = MagicMock()
version_obj = _make_version_object(
name="mixed",
version="v1",
tools=[_make_code_interpreter(), mcp_tool],
)
project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj)
client = _make_mock_foundry_client(project_client=project_client)
toolbox = await client.get_toolbox("mixed", version="v1")
assert len(toolbox.tools) == 2
assert isinstance(toolbox.tools[0], CodeInterpreterTool)
assert isinstance(toolbox.tools[1], FoundryMCPTool)
assert toolbox.tools[1]["project_connection_id"] == "conn_abc"
async def test_toolbox_tools_can_be_passed_to_agent() -> None:
"""Integration smoke: toolbox.tools can be passed directly to Agent(tools=...) ."""
from agent_framework import Agent
project_client = MagicMock()
version_obj = _make_version_object(name="research_tools", version="v1", tools=[_make_code_interpreter()])
project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj)
client = _make_mock_foundry_client(project_client=project_client)
toolbox = await client.get_toolbox("research_tools", version="v1")
agent = Agent(
client=client,
instructions="You are a test agent.",
tools=toolbox.tools,
)
agent_tools = agent.default_options["tools"]
assert len(agent_tools) == 1
assert agent_tools[0]["type"] == "code_interpreter"
async def test_multiple_toolbox_tool_lists_can_be_combined_in_agent() -> None:
"""Nested toolbox ``.tools`` lists flatten into one tool list on Agent construction."""
from agent_framework import Agent
project_client = MagicMock()
project_client.get_openai_client = MagicMock(return_value=MagicMock())
client = _make_mock_foundry_client(project_client=project_client)
toolbox_a = _make_version_object(name="research_tools", version="v1", tools=[_make_code_interpreter()])
toolbox_b = _make_version_object(name="some_other_tools", version="v3", tools=[_make_code_interpreter()])
agent = Agent(
client=client,
instructions="You are a test agent.",
tools=[toolbox_a.tools, toolbox_b.tools],
)
agent_tools = agent.default_options["tools"]
assert len(agent_tools) == 2
assert agent_tools[0]["type"] == "code_interpreter"
assert agent_tools[1]["type"] == "code_interpreter"
# --------------------------------------------------------------------------- #
# toolbox tool selection helpers #
# --------------------------------------------------------------------------- #
def test_get_toolbox_tool_name_prefers_server_label_then_name_then_type() -> None:
from azure.ai.projects.models import MCPTool as FoundryMCPTool
from agent_framework_foundry import get_toolbox_tool_name
mcp_tool = FoundryMCPTool(
server_label="githubmcp",
server_url="https://api.githubcopilot.com/mcp",
)
assert get_toolbox_tool_name(mcp_tool) == "githubmcp"
named_tool = {"type": "code_interpreter", "name": "ci_tool"}
assert get_toolbox_tool_name(named_tool) == "ci_tool"
unnamed_tool = {"type": "web_search"}
assert get_toolbox_tool_name(unnamed_tool) == "web_search"
def test_select_toolbox_tools_filters_by_names() -> None:
from azure.ai.projects.models import MCPTool as FoundryMCPTool
from agent_framework_foundry import select_toolbox_tools
tools: list[Tool | dict[str, Any]] = [
FoundryMCPTool(server_label="githubmcp", server_url="https://api.githubcopilot.com/mcp"),
{"type": "code_interpreter", "name": "python_runner"},
{"type": "web_search"},
]
selected = select_toolbox_tools(tools, include_names=["githubmcp", "python_runner"])
assert len(selected) == 2
assert selected[0] is tools[0]
assert selected[1] is tools[1]
def test_select_toolbox_tools_filters_by_typed_tool_types() -> None:
from agent_framework_foundry import select_toolbox_tools
tools: list[Tool | dict[str, Any]] = [
{"type": "mcp", "server_label": "githubmcp"},
{"type": "code_interpreter", "name": "python_runner"},
{"type": "web_search"},
]
selected = select_toolbox_tools(tools, include_types=["mcp", "code_interpreter"])
assert len(selected) == 2
assert selected[0]["type"] == "mcp"
assert selected[1]["type"] == "code_interpreter"
def test_select_toolbox_tools_accepts_toolbox_object_directly() -> None:
from agent_framework_foundry import select_toolbox_tools
toolbox = _make_version_object(
name="research_tools",
version="v1",
tools=[
{"type": "mcp", "server_label": "githubmcp"}, # type: ignore[list-item]
{"type": "code_interpreter", "name": "python_runner"}, # type: ignore[list-item]
{"type": "web_search"}, # type: ignore[list-item]
],
)
selected = select_toolbox_tools(toolbox, include_types=["mcp", "code_interpreter"])
assert len(selected) == 2
assert selected[0]["type"] == "mcp"
assert selected[1]["type"] == "code_interpreter"
async def test_fetched_toolbox_can_be_combined_with_function_tool() -> None:
from agent_framework import Agent, FunctionTool, tool
project_client = MagicMock()
version_obj = _make_version_object(name="research_tools", version="v1", tools=[_make_code_interpreter()])
project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj)
client = _make_mock_foundry_client(project_client=project_client)
toolbox = await client.get_toolbox("research_tools", version="v1")
@tool(name="local_lookup", description="A local helper tool")
def local_lookup(query: str) -> str:
return query
agent = Agent(
client=client,
instructions="You are a test agent.",
tools=[toolbox, local_lookup],
)
agent_tools = agent.default_options["tools"]
assert len(agent_tools) == 2
assert agent_tools[0]["type"] == "code_interpreter"
assert isinstance(agent_tools[1], FunctionTool)
assert agent_tools[1].name == "local_lookup"
def test_select_toolbox_tools_supports_excludes_and_predicate() -> None:
from agent_framework_foundry import select_toolbox_tools
tools: list[Tool | dict[str, Any]] = [
{"type": "mcp", "server_label": "githubmcp"},
{"type": "mcp", "server_label": "learnmcp"},
{"type": "web_search"},
]
selected = select_toolbox_tools(
tools,
exclude_names=["learnmcp"],
predicate=lambda tool: tool.get("type") == "mcp", # type: ignore[union-attr]
)
assert len(selected) == 1
assert selected[0]["server_label"] == "githubmcp"
async def test_selected_toolbox_subset_can_be_combined_with_function_tool() -> None:
from agent_framework import Agent, FunctionTool, tool
from agent_framework_foundry import select_toolbox_tools
project_client = MagicMock()
version_obj = _make_version_object(
name="research_tools",
version="v1",
tools=[
{"type": "mcp", "server_label": "githubmcp"}, # type: ignore[list-item]
{"type": "code_interpreter", "name": "python_runner"}, # type: ignore[list-item]
{"type": "web_search"}, # type: ignore[list-item]
],
)
project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj)
client = _make_mock_foundry_client(project_client=project_client)
toolbox = await client.get_toolbox("research_tools", version="v1")
selected_tools = select_toolbox_tools(toolbox, include_types=["mcp", "code_interpreter"])
@tool(name="local_lookup", description="A local helper tool")
def local_lookup(query: str) -> str:
return query
agent = Agent(
client=client,
instructions="You are a test agent.",
tools=[selected_tools, local_lookup],
)
agent_tools = agent.default_options["tools"]
assert len(agent_tools) == 3
assert agent_tools[0]["type"] == "mcp"
assert agent_tools[1]["type"] == "code_interpreter"
assert isinstance(agent_tools[2], FunctionTool)
assert agent_tools[2].name == "local_lookup"
# --------------------------------------------------------------------------- #
# Integration #
# --------------------------------------------------------------------------- #
skip_if_foundry_integration_tests_disabled = pytest.mark.skipif(
os.getenv("FOUNDRY_PROJECT_ENDPOINT", "") in ("", "https://test-project.services.ai.azure.com/")
or os.getenv("FOUNDRY_MODEL", "") == "",
reason="No real FOUNDRY_PROJECT_ENDPOINT or FOUNDRY_MODEL provided; skipping integration tests.",
)
@pytest.mark.flaky
@pytest.mark.integration
@skip_if_foundry_integration_tests_disabled
async def test_integration_get_toolbox_round_trip_against_real_project() -> None:
"""Create a toolbox via the raw SDK, fetch via FoundryChatClient, then delete.
Self-contained to avoid depending on toolboxes that may be cleaned up
externally. Exercises both the default-version resolution path
(``get`` + ``get_version``) and the explicit-version path.
"""
from uuid import uuid4
from agent_framework import Agent
from agent_framework_foundry import FoundryChatClient
client = FoundryChatClient(credential=AzureCliCredential())
project_client = client.project_client
toolbox_name = f"af-int-toolbox-{uuid4().hex[:12]}"
created = await project_client.beta.toolboxes.create_version(
name=toolbox_name,
tools=[CodeInterpreterTool()],
description=f"{toolbox_name} integration test",
)
assert isinstance(created, ToolboxVersionObject)
try:
toolbox_default = await client.get_toolbox(toolbox_name)
assert toolbox_default.name == toolbox_name
assert toolbox_default.tools, "Default-version fetch returned no tools"
toolbox_pinned = await client.get_toolbox(toolbox_name, version=created.version)
assert toolbox_pinned.version == created.version
assert toolbox_pinned.tools
agent = Agent(
client=client,
instructions="You are a test agent.",
tools=toolbox_pinned.tools,
)
assert len(agent.default_options["tools"]) == len(toolbox_pinned.tools)
finally:
await project_client.beta.toolboxes.delete(toolbox_name)
+21
View File
@@ -0,0 +1,21 @@
MIT License
Copyright (c) Microsoft Corporation.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE
+132
View File
@@ -0,0 +1,132 @@
# agent-framework-hyperlight
Alpha Hyperlight-backed CodeAct integrations for Microsoft Agent Framework.
## Installation
```bash
pip install agent-framework-hyperlight --pre
```
This package depends on `hyperlight-sandbox`, the packaged Python guest, and the
Wasm backend package on supported platforms. If the backend is not published for
your current platform yet, `execute_code` will fail at runtime when it tries to
create the sandbox.
## Quick start
### Context provider (recommended)
Use `HyperlightCodeActProvider` to automatically inject the `execute_code` tool
and CodeAct instructions into every agent run. Tools registered on the provider
are available inside the sandbox via `call_tool(...)` but are **not** exposed as
direct agent tools.
```python
from agent_framework import Agent, tool
from agent_framework_hyperlight import HyperlightCodeActProvider
@tool
def compute(operation: str, a: float, b: float) -> float:
"""Perform a math operation."""
ops = {"add": a + b, "subtract": a - b, "multiply": a * b, "divide": a / b}
return ops[operation]
codeact = HyperlightCodeActProvider(
tools=[compute],
approval_mode="never_require",
)
agent = Agent(
client=client,
name="CodeActAgent",
instructions="You are a helpful assistant.",
context_providers=[codeact],
)
result = await agent.run("Multiply 6 by 7 using execute_code.")
```
### Standalone tool
Use `HyperlightExecuteCodeTool` directly when you want full control over how the
tool is added to the agent. This is useful when mixing sandbox tools with
direct-only tools on the same agent.
```python
from agent_framework import Agent, tool
from agent_framework_hyperlight import HyperlightExecuteCodeTool
@tool
def send_email(to: str, subject: str, body: str) -> str:
"""Send an email (direct-only, not available inside the sandbox)."""
return f"Email sent to {to}"
execute_code = HyperlightExecuteCodeTool(
tools=[compute],
approval_mode="never_require",
)
agent = Agent(
client=client,
name="MixedToolsAgent",
instructions="You are a helpful assistant.",
tools=[send_email, execute_code],
)
```
### Manual static wiring
For fixed configurations where provider lifecycle overhead is unnecessary, build
the CodeAct instructions once and pass them to the agent at construction time:
```python
execute_code = HyperlightExecuteCodeTool(
tools=[compute],
approval_mode="never_require",
)
codeact_instructions = execute_code.build_instructions(tools_visible_to_model=False)
agent = Agent(
client=client,
name="StaticWiringAgent",
instructions=f"You are a helpful assistant.\n\n{codeact_instructions}",
tools=[execute_code],
)
```
### File mounts and network access
Mount host directories into the sandbox and allow outbound HTTP to specific
domains:
```python
from agent_framework_hyperlight import HyperlightCodeActProvider, FileMount
codeact = HyperlightCodeActProvider(
tools=[compute],
file_mounts=[
"/host/data", # shorthand — same path in sandbox
("/host/models", "/sandbox/models"), # explicit host → sandbox mapping
FileMount("/host/config", "/sandbox/config"), # named tuple
],
allowed_domains=[
"api.github.com", # all methods
("internal.api.example.com", "GET"), # GET only
],
)
```
## Notes
- This package is intentionally separate from `agent-framework-core` so CodeAct
usage and installation remain optional.
- Alpha-package samples live under `packages/hyperlight/samples/`.
- `file_mounts` accepts a single string shorthand, an explicit `(host_path,
mount_path)` pair, or a `FileMount` named tuple. The host-side path in the
explicit forms may be a `str` or `Path`. Use the explicit two-value form when
the host path differs from the sandbox path.
- `allowed_domains` accepts a single string target such as `"github.com"` to
allow all backend-supported methods, an explicit `(target, method_or_methods)`
tuple such as `("github.com", "GET")`, or an `AllowedDomain` named tuple.
@@ -0,0 +1,24 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
import importlib.metadata
from ._execute_code_tool import HyperlightExecuteCodeTool
from ._provider import HyperlightCodeActProvider
from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountInput
try:
__version__ = importlib.metadata.version(__name__)
except importlib.metadata.PackageNotFoundError:
__version__ = "0.0.0"
__all__ = [
"AllowedDomain",
"AllowedDomainInput",
"FileMount",
"FileMountInput",
"HyperlightCodeActProvider",
"HyperlightExecuteCodeTool",
"__version__",
]
@@ -0,0 +1,865 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
import ast
import asyncio
import copy
import mimetypes
import shutil
import threading
import time
from collections.abc import Callable, Sequence
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
from tempfile import TemporaryDirectory
from typing import Annotated, Any, Protocol, TypeGuard, cast
from urllib.parse import urlparse
from agent_framework import Content, FunctionTool
from agent_framework._tools import ApprovalMode, normalize_tools
from pydantic import BaseModel, Field
from ._instructions import build_codeact_instructions, build_execute_code_description
from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountHostPath, FileMountInput
DEFAULT_HYPERLIGHT_BACKEND = "wasm"
DEFAULT_HYPERLIGHT_MODULE = "python_guest.path"
EXECUTE_CODE_INPUT_DESCRIPTION = "Python code to execute in an isolated Hyperlight sandbox."
OUTPUT_FILE_RETRY_ATTEMPTS = 10
OUTPUT_FILE_RETRY_DELAY_SECONDS = 0.1
class _ExecuteCodeInput(BaseModel):
code: Annotated[str, Field(description=EXECUTE_CODE_INPUT_DESCRIPTION)]
@dataclass(frozen=True, slots=True)
class _StoredFileMount:
host_path: Path
mount_path: str
@dataclass(frozen=True, slots=True)
class _NormalizedFileMount:
host_path: Path
mount_path: str
path_signature: tuple[tuple[str, int, int], ...]
@dataclass(frozen=True, slots=True)
class _RunConfig:
backend: str
module: str | None
module_path: str | None
approval_mode: ApprovalMode
tools: tuple[FunctionTool, ...]
workspace_root: Path | None
workspace_signature: tuple[tuple[str, int, int], ...]
file_mounts: tuple[_NormalizedFileMount, ...]
allowed_domains: tuple[AllowedDomain, ...]
@property
def mounted_paths(self) -> tuple[str, ...]:
return tuple(_display_mount_path(mount.mount_path) for mount in self.file_mounts)
@property
def filesystem_enabled(self) -> bool:
return self.workspace_root is not None or bool(self.file_mounts)
def cache_key(self) -> tuple[Any, ...]:
return (
self.backend,
self.module,
self.module_path,
self.approval_mode,
tuple((tool_obj.name, id(tool_obj)) for tool_obj in self.tools),
str(self.workspace_root) if self.workspace_root is not None else None,
self.workspace_signature,
tuple((mount.mount_path, str(mount.host_path), mount.path_signature) for mount in self.file_mounts),
tuple((allowed_domain.target, allowed_domain.methods) for allowed_domain in self.allowed_domains),
)
class SandboxRuntime(Protocol):
def execute(self, *, config: _RunConfig, code: str) -> list[Content]: ...
@dataclass
class _SandboxEntry:
sandbox: Any
snapshot: Any
input_dir: TemporaryDirectory[str] | None
output_dir: TemporaryDirectory[str] | None
lock: threading.RLock
def _load_sandbox_class() -> type[Any]:
try:
from hyperlight_sandbox import Sandbox
except ModuleNotFoundError as exc:
raise ModuleNotFoundError(
"Hyperlight support requires `hyperlight-sandbox`, `hyperlight-sandbox-python-guest`, "
"and a compatible backend package such as `hyperlight-sandbox-backend-wasm`."
) from exc
return Sandbox
def _passthrough_result_parser(result: Any) -> str:
return repr(result)
def _collect_tools(*tool_groups: Any) -> list[FunctionTool]:
tools_by_name: dict[str, FunctionTool] = {}
for tool_group in tool_groups:
normalized_group = normalize_tools(tool_group)
for tool_obj in normalized_group:
if not isinstance(tool_obj, FunctionTool):
continue
if tool_obj.name == "execute_code":
continue
tools_by_name.pop(tool_obj.name, None)
tools_by_name[tool_obj.name] = tool_obj
return list(tools_by_name.values())
def _resolve_execute_code_approval_mode(
*,
base_approval_mode: ApprovalMode,
tools: Sequence[FunctionTool],
) -> ApprovalMode:
if base_approval_mode == "always_require":
return "always_require"
if any(tool_obj.approval_mode == "always_require" for tool_obj in tools):
return "always_require"
return "never_require"
def _resolve_existing_path(value: str | Path) -> Path:
return Path(value).expanduser().resolve(strict=True)
def _resolve_workspace_root(value: str | Path | None) -> Path | None:
if value is None:
return None
resolved_path = _resolve_existing_path(value)
if not resolved_path.is_dir():
raise ValueError("workspace_root must point to an existing directory.")
return resolved_path
def _is_file_mount_pair(value: Any) -> TypeGuard[FileMount | tuple[FileMountHostPath, str]]:
if not isinstance(value, tuple):
return False
value_tuple = cast(tuple[object, ...], value)
if len(value_tuple) != 2:
return False
host_path, mount_path = value_tuple
return isinstance(host_path, (str, Path)) and isinstance(mount_path, str)
def _normalize_file_mount_input(file_mount: FileMountInput) -> _StoredFileMount:
host_path: FileMountHostPath
mount_path: str
if isinstance(file_mount, str):
host_path = file_mount
mount_path = file_mount
else:
host_path = file_mount[0]
mount_path = file_mount[1]
return _StoredFileMount(
host_path=_resolve_existing_path(host_path),
mount_path=_normalize_mount_path(mount_path),
)
def _normalize_domain(target: str) -> str:
candidate = target.strip()
if not candidate:
raise ValueError("Allowed domain entries must not be empty.")
parsed = urlparse(candidate if "://" in candidate else f"//{candidate}")
normalized = (parsed.netloc or parsed.path).strip().rstrip("/")
if not normalized:
raise ValueError(f"Could not normalize allowed domain entry: {target!r}.")
return normalized.lower()
def _normalize_http_method(method: str) -> str:
normalized = method.strip().upper()
if not normalized:
raise ValueError("HTTP method entries must not be empty.")
return normalized
def _normalize_http_methods(methods: str | Sequence[str] | None) -> tuple[str, ...] | None:
if methods is None:
return None
normalized_methods = (
{_normalize_http_method(methods)}
if isinstance(methods, str)
else {_normalize_http_method(method) for method in methods}
)
if not normalized_methods:
raise ValueError("Allowed domain methods must not be empty when provided.")
return tuple(sorted(normalized_methods))
def _is_allowed_domain_pair(value: Any) -> TypeGuard[tuple[str, str | Sequence[str]]]:
if not isinstance(value, tuple) or isinstance(value, AllowedDomain):
return False
value_tuple = cast(tuple[object, ...], value)
if len(value_tuple) != 2:
return False
target, methods = value_tuple
if not isinstance(target, str):
return False
if isinstance(methods, str):
return True
return isinstance(methods, Sequence)
def _normalize_allowed_domain_input(allowed_domain: AllowedDomainInput) -> AllowedDomain:
if isinstance(allowed_domain, str):
return AllowedDomain(target=_normalize_domain(allowed_domain), methods=None)
if isinstance(allowed_domain, AllowedDomain):
return AllowedDomain(
target=_normalize_domain(allowed_domain.target),
methods=_normalize_http_methods(allowed_domain.methods),
)
target, methods = allowed_domain
return AllowedDomain(
target=_normalize_domain(target),
methods=_normalize_http_methods(methods),
)
def _allowed_domain_registration_targets(*, target: str, expand_missing_scheme: bool) -> tuple[str, ...]:
if not expand_missing_scheme or "://" in target:
return (target,)
return (f"http://{target}", f"https://{target}")
def _should_retry_allowed_domain_registration(
*,
error: RuntimeError,
allowed_domains: Sequence[AllowedDomain],
) -> bool:
message = str(error).lower()
return "invalid url for network permission" in message and any(
"://" not in domain.target for domain in allowed_domains
)
def _normalize_mount_path(mount_path: str) -> str:
raw_path = mount_path.strip().replace("\\", "/")
if not raw_path:
raise ValueError("mount_path must not be empty.")
pure_path = PurePosixPath(raw_path)
parts = [part for part in pure_path.parts if part not in {"", "/", "."}]
if parts and parts[0] == "input":
parts = parts[1:]
if any(part == ".." for part in parts):
raise ValueError("mount_path must stay within /input.")
if not parts:
raise ValueError("mount_path must point to a concrete path under /input.")
return "/".join(parts)
def _display_mount_path(mount_path: str) -> str:
return f"/input/{mount_path}"
def _path_tree_signature(path: Path) -> tuple[tuple[str, int, int], ...]:
if path.is_file():
stat = path.stat()
return ((path.name, int(stat.st_size), int(stat.st_mtime_ns)),)
entries: list[tuple[str, int, int]] = []
for candidate in sorted(path.rglob("*"), key=lambda value: value.as_posix()):
try:
stat = candidate.stat()
except FileNotFoundError:
continue
relative_path = candidate.relative_to(path).as_posix()
size = int(stat.st_size) if candidate.is_file() else 0
entries.append((relative_path, size, int(stat.st_mtime_ns)))
return tuple(entries)
def _copy_path(source: Path, destination: Path) -> None:
if source.is_dir():
destination.mkdir(parents=True, exist_ok=True)
for child in sorted(source.iterdir(), key=lambda value: value.name):
_copy_path(child, destination / child.name)
return
destination.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source, destination)
def _populate_input_dir(*, config: _RunConfig, input_root: Path) -> None:
if config.workspace_root is not None:
for child in sorted(config.workspace_root.iterdir(), key=lambda value: value.name):
_copy_path(child, input_root / child.name)
for mount in config.file_mounts:
_copy_path(mount.host_path, input_root / mount.mount_path)
def _create_file_content(file_path: Path, *, relative_path: str) -> Content:
media_type = mimetypes.guess_type(file_path.name)[0] or "application/octet-stream"
return Content.from_data(
data=file_path.read_bytes(),
media_type=media_type,
additional_properties={"path": f"/output/{relative_path}"},
)
def _normalize_output_relative_path(*, output_file: object, root: Path) -> str | None:
candidate_path = Path(str(output_file))
if candidate_path.is_absolute():
try:
return candidate_path.relative_to(root).as_posix()
except ValueError:
return None
raw_path = str(output_file).replace("\\", "/")
pure_path = PurePosixPath(raw_path)
parts = [part for part in pure_path.parts if part not in {"", "/", "."}]
if parts and parts[0] == "output":
parts = parts[1:]
if not parts or any(part == ".." for part in parts):
return None
return "/".join(parts)
def _collect_output_relative_paths(*, sandbox: Any, root: Path) -> set[str]:
relative_paths: set[str] = set()
if hasattr(sandbox, "get_output_files"):
try:
output_files = cast(Sequence[object], sandbox.get_output_files())
except Exception:
output_files = ()
for output_file in output_files:
if (relative_path := _normalize_output_relative_path(output_file=output_file, root=root)) is not None:
relative_paths.add(relative_path)
for host_path in root.rglob("*"):
if host_path.is_file():
relative_paths.add(host_path.relative_to(root).as_posix())
return relative_paths
def _parse_output_files(
*,
sandbox: Any,
output_dir: TemporaryDirectory[str] | None,
expect_output_files: bool,
) -> list[Content]:
if output_dir is None:
return []
root = Path(output_dir.name)
for attempt in range(OUTPUT_FILE_RETRY_ATTEMPTS):
relative_paths = _collect_output_relative_paths(sandbox=sandbox, root=root)
missing_files = expect_output_files and not relative_paths
contents: list[Content] = []
for relative_path in sorted(relative_paths):
host_path = root.joinpath(*PurePosixPath(relative_path).parts)
if not host_path.is_file():
missing_files = True
continue
try:
contents.append(_create_file_content(host_path, relative_path=relative_path))
except PermissionError:
missing_files = True
if not missing_files or attempt == OUTPUT_FILE_RETRY_ATTEMPTS - 1:
return contents
time.sleep(OUTPUT_FILE_RETRY_DELAY_SECONDS)
return []
def _build_execution_contents(
*,
result: Any,
sandbox: Any,
output_dir: TemporaryDirectory[str] | None,
code: str,
) -> list[Content]:
success = bool(getattr(result, "success", False))
stdout = str(getattr(result, "stdout", "") or "").replace("\r\n", "\n") or None
stderr = str(getattr(result, "stderr", "") or "").replace("\r\n", "\n") or None
outputs: list[Content] = []
if stdout is not None:
outputs.append(Content.from_text(stdout, raw_representation=result))
outputs.extend(
_parse_output_files(
sandbox=sandbox,
output_dir=output_dir,
expect_output_files="/output" in code,
)
)
if success:
if stderr is not None:
outputs.append(Content.from_text(stderr, raw_representation=result))
if not outputs:
outputs.append(Content.from_text("Code executed successfully without output."))
return outputs
error_details = stderr or "Unknown sandbox error"
outputs.append(
Content.from_error(
message="Execution error",
error_details=error_details,
raw_representation=result,
)
)
return outputs
def _make_sandbox_callback(tool_obj: FunctionTool) -> Callable[..., Any]:
sandbox_tool = copy.copy(tool_obj)
# Auto-assign a passthrough parser so the raw return value round-trips through
# `ast.literal_eval` in the sandbox callback below. User-supplied parsers are
# left in place so callers can customize how results are exposed to the guest.
if sandbox_tool.result_parser is None:
sandbox_tool.result_parser = _passthrough_result_parser
def _callback(**kwargs: Any) -> Any:
async def _invoke() -> list[Content]:
return await sandbox_tool.invoke(arguments=kwargs)
# FunctionTool.invoke() is always async. The real Hyperlight backend invokes
# registered callbacks synchronously via FFI, so this must be a sync function.
# We run the async call on a dedicated thread to avoid conflicts with any
# event loop that may be running on the current thread.
result_box: list[Any] = [None]
error_box: list[BaseException] = []
def _run() -> None:
try:
result_box[0] = asyncio.run(_invoke())
except BaseException as exc:
error_box.append(exc)
worker = threading.Thread(target=_run)
worker.start()
worker.join()
if error_box:
raise error_box[0]
contents: list[Content] = result_box[0]
values: list[Any] = []
for content in contents:
if content.type == "text" and content.text is not None:
try:
values.append(ast.literal_eval(content.text))
except (SyntaxError, ValueError):
values.append(content.text)
continue
values.append(content.to_dict())
if len(values) == 1:
return values[0]
return values
return _callback
def _clear_directory(output_dir: TemporaryDirectory[str] | None) -> None:
"""Remove all contents of the output directory without deleting the directory itself."""
if output_dir is None:
return
root = Path(output_dir.name)
for child in root.iterdir():
try:
if child.is_symlink() or child.is_file():
child.unlink()
elif child.is_dir():
shutil.rmtree(child, ignore_errors=True)
except (FileNotFoundError, PermissionError):
pass
class _SandboxRegistry:
def __init__(self) -> None:
self._entries: dict[tuple[Any, ...], _SandboxEntry] = {}
self._entries_lock = threading.RLock()
def execute(self, *, config: _RunConfig, code: str) -> list[Content]:
"""Execute code in a cached sandbox matching the given config.
Entries are keyed by ``config.cache_key()``. Concurrent calls with the same
key are serialized by the entry lock so they never race, but they share the
same sandbox instance. For true parallel execution, use distinct provider
instances or configs that produce different cache keys.
"""
cache_key = config.cache_key()
with self._entries_lock:
entry = self._entries.get(cache_key)
if entry is None:
entry = self._create_entry(config)
self._entries[cache_key] = entry
with entry.lock:
entry.sandbox.restore(entry.snapshot)
_clear_directory(entry.output_dir)
result = entry.sandbox.run(code=code)
return _build_execution_contents(
result=result,
sandbox=entry.sandbox,
output_dir=entry.output_dir,
code=code,
)
def _create_entry(self, config: _RunConfig) -> _SandboxEntry:
input_dir_handle = TemporaryDirectory() if config.filesystem_enabled else None
output_dir_handle = TemporaryDirectory() if config.filesystem_enabled else None
if input_dir_handle is not None:
_populate_input_dir(config=config, input_root=Path(input_dir_handle.name))
sandbox_cls = _load_sandbox_class()
def _create_sandbox() -> Any:
try:
return sandbox_cls(
backend=config.backend,
module=config.module,
module_path=config.module_path,
input_dir=input_dir_handle.name if input_dir_handle is not None else None,
output_dir=output_dir_handle.name if output_dir_handle is not None else None,
)
except ImportError as exc:
raise RuntimeError(
"The selected Hyperlight backend is not installed or not supported on this platform. "
"Install a compatible backend package, such as `hyperlight-sandbox-backend-wasm`."
) from exc
def _configure_sandbox(*, sandbox: Any, expand_missing_scheme: bool) -> None:
for tool_obj in config.tools:
sandbox.register_tool(tool_obj.name, _make_sandbox_callback(tool_obj))
for allowed_domain in config.allowed_domains:
for target in _allowed_domain_registration_targets(
target=allowed_domain.target,
expand_missing_scheme=expand_missing_scheme,
):
sandbox.allow_domain(
target,
methods=list(allowed_domain.methods) if allowed_domain.methods is not None else None,
)
sandbox = _create_sandbox()
_configure_sandbox(sandbox=sandbox, expand_missing_scheme=False)
try:
sandbox.run("None")
except RuntimeError as exc:
if not _should_retry_allowed_domain_registration(error=exc, allowed_domains=config.allowed_domains):
raise
sandbox = _create_sandbox()
_configure_sandbox(sandbox=sandbox, expand_missing_scheme=True)
sandbox.run("None")
snapshot = sandbox.snapshot()
return _SandboxEntry(
sandbox=sandbox,
snapshot=snapshot,
input_dir=input_dir_handle,
output_dir=output_dir_handle,
lock=threading.RLock(),
)
class HyperlightExecuteCodeTool(FunctionTool):
"""Execute Python code inside a Hyperlight sandbox."""
def __init__(
self,
*,
tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None,
approval_mode: ApprovalMode | None = None,
workspace_root: str | Path | None = None,
file_mounts: FileMountInput | Sequence[FileMountInput] | None = None,
allowed_domains: AllowedDomainInput | Sequence[AllowedDomainInput] | None = None,
backend: str = DEFAULT_HYPERLIGHT_BACKEND,
module: str | None = DEFAULT_HYPERLIGHT_MODULE,
module_path: str | None = None,
_registry: SandboxRuntime | None = None,
) -> None:
super().__init__(
name="execute_code",
description=EXECUTE_CODE_INPUT_DESCRIPTION,
approval_mode="never_require",
func=self._run_code,
input_model=_ExecuteCodeInput,
)
self._state_lock = threading.RLock()
self._registry = _registry or _SandboxRegistry()
self._default_approval_mode: ApprovalMode = approval_mode or "never_require"
self._workspace_root = _resolve_workspace_root(workspace_root)
self._backend: str = backend
self._module: str | None = module
self._module_path: str | None = module_path
self._managed_tools: list[FunctionTool] = []
self._file_mounts: dict[str, _StoredFileMount] = {}
self._allowed_domains: dict[str, AllowedDomain] = {}
if tools is not None:
self.add_tools(tools)
if file_mounts is not None:
self.add_file_mounts(file_mounts)
if allowed_domains is not None:
self.add_allowed_domains(allowed_domains)
self._refresh_approval_mode()
@property
def description(self) -> str:
state_lock = getattr(self, "_state_lock", None)
if state_lock is None:
return str(self.__dict__.get("description", EXECUTE_CODE_INPUT_DESCRIPTION))
with state_lock:
allowed_domains = sorted(self._allowed_domains.values(), key=lambda value: value.target)
return build_execute_code_description(
tools=self._managed_tools,
filesystem_enabled=self._workspace_root is not None or bool(self._file_mounts),
workspace_enabled=self._workspace_root is not None,
mounted_paths=[_display_mount_path(mount.mount_path) for mount in self._file_mounts.values()],
allowed_domains=allowed_domains,
)
@description.setter
def description(self, value: str) -> None:
self.__dict__["description"] = value
def add_tools(
self,
tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]],
) -> None:
"""Add sandbox-managed tools to this execute_code surface."""
with self._state_lock:
combined_tools = _collect_tools(self._managed_tools, tools)
self._managed_tools = combined_tools
self._refresh_approval_mode()
def get_tools(self) -> list[FunctionTool]:
"""Return the currently managed sandbox tools."""
with self._state_lock:
return list(self._managed_tools)
def remove_tool(self, name: str) -> None:
"""Remove one managed sandbox tool by name."""
with self._state_lock:
remaining_tools = [tool_obj for tool_obj in self._managed_tools if tool_obj.name != name]
if len(remaining_tools) == len(self._managed_tools):
raise KeyError(f"No managed tool named {name!r} is registered.")
self._managed_tools = remaining_tools
self._refresh_approval_mode()
def clear_tools(self) -> None:
"""Remove all managed sandbox tools."""
with self._state_lock:
self._managed_tools = []
self._refresh_approval_mode()
def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None:
"""Add one or more file mounts under `/input`.
A single string uses the same relative path on the host and in the sandbox.
Use a two-string tuple or `FileMount` when those paths differ.
"""
if isinstance(file_mounts, str) or _is_file_mount_pair(file_mounts):
normalized_mounts = [_normalize_file_mount_input(file_mounts)]
else:
normalized_mounts = [
_normalize_file_mount_input(mount) for mount in cast(Sequence[FileMountInput], file_mounts)
]
with self._state_lock:
for mount in normalized_mounts:
self._file_mounts[mount.mount_path] = mount
def get_file_mounts(self) -> list[FileMount]:
"""Return the configured file mounts."""
with self._state_lock:
return [
FileMount(host_path=mount.host_path, mount_path=_display_mount_path(mount.mount_path))
for mount in self._file_mounts.values()
]
def remove_file_mount(self, mount_path: str) -> None:
"""Remove one file mount by its sandbox path."""
normalized_mount_path = _normalize_mount_path(mount_path)
with self._state_lock:
if normalized_mount_path not in self._file_mounts:
raise KeyError(f"No file mount exists for {mount_path!r}.")
del self._file_mounts[normalized_mount_path]
def clear_file_mounts(self) -> None:
"""Remove all configured file mounts."""
with self._state_lock:
self._file_mounts.clear()
def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None:
"""Add one or more outbound allow-list entries."""
if isinstance(domains, (str, AllowedDomain)) or _is_allowed_domain_pair(domains):
normalized_domains = [_normalize_allowed_domain_input(domains)]
else:
normalized_domains = [
_normalize_allowed_domain_input(domain) for domain in cast(Sequence[AllowedDomainInput], domains)
]
with self._state_lock:
for normalized_domain in normalized_domains:
self._allowed_domains[normalized_domain.target] = normalized_domain
def get_allowed_domains(self) -> list[AllowedDomain]:
"""Return the configured outbound allow-list entries."""
with self._state_lock:
return sorted(self._allowed_domains.values(), key=lambda value: value.target)
def remove_allowed_domain(self, domain: str) -> None:
"""Remove one outbound allow-list entry."""
normalized_domain = _normalize_domain(domain)
with self._state_lock:
if normalized_domain not in self._allowed_domains:
raise KeyError(f"No allowed domain exists for {domain!r}.")
del self._allowed_domains[normalized_domain]
def clear_allowed_domains(self) -> None:
"""Remove all outbound allow-list entries."""
with self._state_lock:
self._allowed_domains.clear()
def build_instructions(self, *, tools_visible_to_model: bool) -> str:
"""Build the current CodeAct instructions for this execute_code surface."""
config = self._build_run_config()
return build_codeact_instructions(
tools=config.tools,
tools_visible_to_model=tools_visible_to_model,
filesystem_enabled=config.filesystem_enabled,
)
def create_run_tool(self) -> HyperlightExecuteCodeTool:
"""Create a run-scoped snapshot of this execute_code surface."""
file_mounts = self.get_file_mounts()
allowed_domains = self.get_allowed_domains()
return HyperlightExecuteCodeTool(
tools=self.get_tools(),
approval_mode=self._default_approval_mode,
workspace_root=self._workspace_root,
file_mounts=file_mounts or None,
allowed_domains=allowed_domains or None,
backend=self._backend,
module=self._module,
module_path=self._module_path,
_registry=self._registry,
)
def build_serializable_state(self) -> dict[str, Any]:
"""Return a JSON-serializable snapshot of the effective run state."""
config = self._build_run_config()
return {
"backend": config.backend,
"module": config.module,
"module_path": config.module_path,
"approval_mode": config.approval_mode,
"tool_names": [tool_obj.name for tool_obj in config.tools],
"filesystem_enabled": config.filesystem_enabled,
"workspace_root": str(config.workspace_root) if config.workspace_root is not None else None,
"file_mounts": [
{
"host_path": str(mount.host_path),
"mount_path": _display_mount_path(mount.mount_path),
}
for mount in config.file_mounts
],
"network_enabled": bool(config.allowed_domains),
"allowed_domains": [
{
"target": allowed_domain.target,
"methods": list(allowed_domain.methods) if allowed_domain.methods is not None else None,
}
for allowed_domain in config.allowed_domains
],
}
def to_dict(self, *, exclude: set[str] | None = None, exclude_none: bool = True) -> dict[str, Any]:
self.__dict__["description"] = self.description
return super().to_dict(exclude=exclude, exclude_none=exclude_none)
def _refresh_approval_mode(self) -> None:
self.approval_mode = _resolve_execute_code_approval_mode(
base_approval_mode=self._default_approval_mode,
tools=self._managed_tools,
)
def _build_run_config(self) -> _RunConfig:
with self._state_lock:
managed_tools = tuple(self._managed_tools)
workspace_root = self._workspace_root
stored_mounts = tuple(self._file_mounts.values())
allowed_domains = tuple(sorted(self._allowed_domains.values(), key=lambda value: value.target))
approval_mode = _resolve_execute_code_approval_mode(
base_approval_mode=self._default_approval_mode,
tools=managed_tools,
)
workspace_signature = _path_tree_signature(workspace_root) if workspace_root is not None else ()
normalized_mounts = tuple(
_NormalizedFileMount(
host_path=mount.host_path,
mount_path=mount.mount_path,
path_signature=_path_tree_signature(mount.host_path),
)
for mount in stored_mounts
)
return _RunConfig(
backend=self._backend,
module=self._module,
module_path=self._module_path,
approval_mode=approval_mode,
tools=managed_tools,
workspace_root=workspace_root,
workspace_signature=workspace_signature,
file_mounts=normalized_mounts,
allowed_domains=allowed_domains,
)
async def _run_code(self, *, code: str) -> list[Content]:
config = self._build_run_config()
return await asyncio.to_thread(self._registry.execute, config=config, code=code)
@@ -0,0 +1,139 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
from collections.abc import Sequence
from agent_framework import FunctionTool
from ._types import AllowedDomain
def _format_tool_summaries(tools: Sequence[FunctionTool]) -> str:
if not tools:
return "- No tools are currently registered inside the sandbox."
lines: list[str] = []
for tool_obj in tools:
parameters = tool_obj.parameters().get("properties", {})
parameter_names = [name for name in parameters if isinstance(name, str)]
parameter_summary = ", ".join(parameter_names) if parameter_names else "none"
description = str(tool_obj.description or "").strip() or "No description provided."
lines.append(f"- `{tool_obj.name}`: {description} Parameters: {parameter_summary}.")
return "\n".join(lines)
def _format_filesystem_capabilities(
*,
filesystem_enabled: bool,
workspace_enabled: bool,
mounted_paths: Sequence[str],
) -> str:
if not filesystem_enabled:
return "Filesystem access is unavailable because no workspace root or file mounts are configured."
lines = ["Filesystem access is enabled."]
lines.append("Read files from `/input`.")
lines.append("Write generated artifacts to `/output`; returned files will be attached to the tool result.")
if workspace_enabled:
lines.append("The configured workspace root is available under `/input/`.")
if mounted_paths:
lines.append("Additional mounted paths:")
lines.extend(f"- `{mounted_path}`" for mounted_path in mounted_paths)
elif not workspace_enabled:
lines.append("No workspace root or explicit file mounts are currently configured.")
return "\n".join(lines)
def _format_network_capabilities(
*,
allowed_domains: Sequence[AllowedDomain],
) -> str:
if not allowed_domains:
return "Outbound network access is unavailable because no allow-listed targets are configured."
lines = ["Outbound network access is allowed only for these configured targets:"]
for allowed_domain in allowed_domains:
methods_text = (
", ".join(allowed_domain.methods) if allowed_domain.methods else "all methods allowed by the backend"
)
lines.append(f"- `{allowed_domain.target}`: {methods_text}.")
return "\n".join(lines)
def build_codeact_instructions(
*,
tools: Sequence[FunctionTool],
tools_visible_to_model: bool,
filesystem_enabled: bool = False,
) -> str:
"""Build dynamic CodeAct instructions for the effective sandbox state."""
usage_note = (
"Some tools may also appear directly, but prefer `execute_code` whenever you need to combine Python "
"control flow with sandbox tool calls."
if tools_visible_to_model
else "Provider-owned sandbox tools are not exposed separately; use `execute_code` when you need them."
)
output_note = (
"To surface results from `execute_code`, end the code with `print(...)`; the sandbox does not "
"return the value of the last expression."
)
if filesystem_enabled:
output_note += (
" For larger artifacts, write them to `/output/<filename>` instead — returned files will be "
"attached to the tool result."
)
return f"""You have one primary tool: execute_code.
Prefer one execute_code call per request when possible.
Its tool description contains the current `call_tool(...)` guidance, sandbox
tool registry, and capability limits.
{output_note}
{usage_note}
"""
def build_execute_code_description(
*,
tools: Sequence[FunctionTool],
filesystem_enabled: bool,
workspace_enabled: bool,
mounted_paths: Sequence[str],
allowed_domains: Sequence[AllowedDomain],
) -> str:
"""Build the dynamic execute_code tool description for standalone usage."""
filesystem_text = _format_filesystem_capabilities(
filesystem_enabled=filesystem_enabled,
workspace_enabled=workspace_enabled,
mounted_paths=mounted_paths,
)
network_text = _format_network_capabilities(
allowed_domains=allowed_domains,
)
return f"""Execute Python in an isolated Hyperlight sandbox.
Inside the sandbox, `call_tool(name, **kwargs)` is available as a built-in for
registered host callbacks. Use the tool name as the first argument and keyword
arguments only. Do not pass a dict or any other positional arguments after the
tool name.
Registered sandbox tools:
{_format_tool_summaries(tools)}
Filesystem capabilities:
{filesystem_text}
Network capabilities:
{network_text}
Prefer `execute_code` when you need to combine one or more `call_tool(...)`
calls with Python control flow, loops, or post-processing.
"""
@@ -0,0 +1,111 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
from collections.abc import Callable, Sequence
from pathlib import Path
from typing import Any
from agent_framework import AgentSession, ContextProvider, FunctionTool, SessionContext
from agent_framework._tools import ApprovalMode
from ._execute_code_tool import HyperlightExecuteCodeTool, SandboxRuntime
from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountInput
class HyperlightCodeActProvider(ContextProvider):
"""Inject a Hyperlight-backed CodeAct surface using provider-owned tools."""
DEFAULT_SOURCE_ID = "hyperlight_codeact"
def __init__(
self,
source_id: str = DEFAULT_SOURCE_ID,
*,
tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None,
approval_mode: ApprovalMode | None = None,
workspace_root: str | Path | None = None,
file_mounts: FileMountInput | Sequence[FileMountInput] | None = None,
allowed_domains: AllowedDomainInput | Sequence[AllowedDomainInput] | None = None,
backend: str = "wasm",
module: str | None = "python_guest.path",
module_path: str | None = None,
_registry: SandboxRuntime | None = None,
) -> None:
super().__init__(source_id)
self._execute_code_tool = HyperlightExecuteCodeTool(
tools=tools,
approval_mode=approval_mode,
workspace_root=workspace_root,
file_mounts=file_mounts,
allowed_domains=allowed_domains,
backend=backend,
module=module,
module_path=module_path,
_registry=_registry,
)
def add_tools(
self,
tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]],
) -> None:
"""Add provider-owned sandbox tools."""
self._execute_code_tool.add_tools(tools)
def get_tools(self) -> list[FunctionTool]:
"""Return the provider-owned sandbox tools."""
return self._execute_code_tool.get_tools()
def remove_tool(self, name: str) -> None:
"""Remove one provider-owned sandbox tool by name."""
self._execute_code_tool.remove_tool(name)
def clear_tools(self) -> None:
"""Remove all provider-owned sandbox tools."""
self._execute_code_tool.clear_tools()
def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None:
"""Add provider-managed file mounts."""
self._execute_code_tool.add_file_mounts(file_mounts)
def get_file_mounts(self) -> list[FileMount]:
"""Return the provider-managed file mounts."""
return self._execute_code_tool.get_file_mounts()
def remove_file_mount(self, mount_path: str) -> None:
"""Remove one provider-managed file mount."""
self._execute_code_tool.remove_file_mount(mount_path)
def clear_file_mounts(self) -> None:
"""Remove all provider-managed file mounts."""
self._execute_code_tool.clear_file_mounts()
def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None:
"""Add provider-managed outbound allow-list entries."""
self._execute_code_tool.add_allowed_domains(domains)
def get_allowed_domains(self) -> list[AllowedDomain]:
"""Return the provider-managed outbound allow-list entries."""
return self._execute_code_tool.get_allowed_domains()
def remove_allowed_domain(self, domain: str) -> None:
"""Remove one provider-managed outbound allow-list entry."""
self._execute_code_tool.remove_allowed_domain(domain)
def clear_allowed_domains(self) -> None:
"""Remove all provider-managed outbound allow-list entries."""
self._execute_code_tool.clear_allowed_domains()
async def before_run(
self,
*,
agent: Any,
session: AgentSession | None,
context: SessionContext,
state: dict[str, Any],
) -> None:
"""Inject CodeAct instructions and a run-scoped execute_code tool before each run."""
run_tool = self._execute_code_tool.create_run_tool()
state[self.source_id] = run_tool.build_serializable_state()
context.extend_instructions(self.source_id, run_tool.build_instructions(tools_visible_to_model=False))
context.extend_tools(self.source_id, [run_tool])
@@ -0,0 +1,28 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
from collections.abc import Sequence
from pathlib import Path
from typing import NamedTuple, TypeAlias
class FileMount(NamedTuple):
"""Map a host file or directory into the sandbox input tree."""
host_path: str | Path
mount_path: str
FileMountHostPath: TypeAlias = str | Path
FileMountInput: TypeAlias = str | tuple[FileMountHostPath, str] | FileMount
class AllowedDomain(NamedTuple):
"""Allow outbound requests to one target, optionally restricted to specific HTTP methods."""
target: str
methods: tuple[str, ...] | None = None
AllowedDomainInput: TypeAlias = str | tuple[str, str | Sequence[str]] | AllowedDomain
+101
View File
@@ -0,0 +1,101 @@
[project]
name = "agent-framework-hyperlight"
description = "Hyperlight CodeAct integrations for Microsoft Agent Framework."
authors = [{ name = "Microsoft", email = "af-support@microsoft.com"}]
readme = "README.md"
requires-python = ">=3.10"
version = "1.0.0a260409"
license-files = ["LICENSE"]
urls.homepage = "https://aka.ms/agent-framework"
urls.source = "https://github.com/microsoft/agent-framework/tree/main/python"
urls.release_notes = "https://github.com/microsoft/agent-framework/releases?q=tag%3Apython-1&expanded=true"
urls.issues = "https://github.com/microsoft/agent-framework/issues"
classifiers = [
"License :: OSI Approved :: MIT License",
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Typing :: Typed",
]
dependencies = [
"agent-framework-core>=1.0.0,<2",
"hyperlight-sandbox>=0.3.0,<0.4",
"hyperlight-sandbox-backend-wasm>=0.3.0,<0.4 ; ((sys_platform == 'linux' and platform_machine == 'x86_64') or (sys_platform == 'win32' and platform_machine == 'AMD64')) and python_version < '3.14'",
"hyperlight-sandbox-python-guest>=0.3.0,<0.4",
]
[tool.uv]
prerelease = "if-necessary-or-explicit"
environments = [
"sys_platform == 'linux'",
"sys_platform == 'win32'"
]
[tool.uv-dynamic-versioning]
fallback-version = "0.0.0"
[tool.pytest.ini_options]
testpaths = 'tests'
addopts = "-ra -q -r fEX"
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
filterwarnings = []
timeout = 120
markers = [
"integration: marks tests as integration tests that require external services",
]
[tool.ruff]
extend = "../../pyproject.toml"
[tool.ruff.lint.per-file-ignores]
"samples/**" = ["INP", "T201"]
"tests/**" = ["D", "INP", "TD", "ERA001", "RUF", "S"]
[tool.coverage.run]
omit = [
"**/__init__.py"
]
[tool.pyright]
extends = "../../pyproject.toml"
include = ["agent_framework_hyperlight"]
exclude = ['tests']
[tool.mypy]
plugins = ['pydantic.mypy']
strict = true
python_version = "3.10"
ignore_missing_imports = true
disallow_untyped_defs = true
no_implicit_optional = true
check_untyped_defs = true
warn_return_any = true
show_error_codes = true
warn_unused_ignores = false
disallow_incomplete_defs = true
disallow_untyped_decorators = true
[tool.bandit]
targets = ["agent_framework_hyperlight"]
exclude_dirs = ["tests", "samples"]
[tool.poe]
executor.type = "uv"
include = "../../shared_tasks.toml"
[tool.poe.tasks.mypy]
help = "Run MyPy for this package."
cmd = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework_hyperlight"
[tool.poe.tasks.test]
help = "Run the default unit test suite for this package."
cmd = 'pytest -m "not integration" --cov=agent_framework_hyperlight --cov-report=term-missing:skip-covered tests'
[build-system]
requires = ["flit-core >= 3.11,<4.0"]
build-backend = "flit_core.buildapi"
@@ -0,0 +1,43 @@
# Hyperlight CodeAct samples
These samples demonstrate the alpha `agent-framework-hyperlight` package.
## When to use which pattern
- **Provider pattern** (`codeact_context_provider.py`): Use when the tool
registry, file mounts, or network allow-list may change between runs, or when
you want the provider to manage CodeAct instructions and approval computation
automatically on every invocation. This is the recommended default for
production agents that need dynamic capability management or concurrent runs
sharing one provider.
- **Manual static wiring** (`codeact_manual_wiring.py`): Use when the sandbox
tool set and capabilities are fixed for the agent's lifetime. This pattern
builds instructions once, passes `execute_code` alongside direct tools in
`tools=`, and skips the per-run provider lifecycle entirely. Simpler setup,
but changes to the tool registry after construction will not update the
agent's instructions automatically.
- **Standalone tool** (`codeact_tool.py`): Use for the simplest integration
where `execute_code` is added directly to the agent tool list. The tool's own
description advertises `call_tool(...)` and the registered sandbox tools, so
no extra agent instructions are needed. Best for quick prototyping or when
CodeAct is just another tool alongside the agent's direct tools.
## Samples
- `codeact_context_provider.py` shows the provider-owned CodeAct model where the
agent only sees `execute_code` and sandbox tools are owned by
`HyperlightCodeActProvider`.
- `codeact_manual_wiring.py` shows static wiring where `HyperlightExecuteCodeTool`
and its instructions are passed directly to the `Agent` constructor.
- `codeact_tool.py` shows the standalone `HyperlightExecuteCodeTool` surface
where `execute_code` is added directly to the agent tool list.
Run the samples from the repository after installing the workspace dependencies:
```bash
uv run --directory packages/hyperlight python samples/codeact_context_provider.py
uv run --directory packages/hyperlight python samples/codeact_manual_wiring.py
uv run --directory packages/hyperlight python samples/codeact_tool.py
```
@@ -0,0 +1,253 @@
# Copyright (c) Microsoft. All rights reserved.
"""Benchmark CodeAct vs. traditional tool-calling for a multi-tool-call task.
This sample runs the same prompt against the same FoundryChatClient twice:
1. **Traditional tool-calling**: the five business tools are passed directly to
the agent, so the model calls each tool individually via the LLM tool-call
interface.
2. **CodeAct**: the same tools are registered on a HyperlightCodeActProvider
and the model sees a single ``execute_code`` tool that calls them from
inside the Hyperlight sandbox via ``call_tool(...)``.
The task (computing grand totals per user) naturally requires many tool calls
to complete. At the end, the sample prints elapsed time and token usage for
each run so the two approaches can be compared.
Run with:
cd python
uv run --directory packages/hyperlight python samples/codeact_benchmark.py
Required environment variables (loaded from ``.env`` if present):
FOUNDRY_PROJECT_ENDPOINT
FOUNDRY_MODEL
"""
from __future__ import annotations
import asyncio
import os
import time
from typing import Annotated, Any, Literal
from agent_framework import Agent, AgentResponse, UsageDetails
from agent_framework.foundry import FoundryChatClient
from azure.identity import AzureCliCredential
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from agent_framework_hyperlight import HyperlightCodeActProvider
load_dotenv()
# 1. Deterministic "business" data and tools.
_USERS: list[dict[str, Any]] = [
{"id": 1, "name": "Alice", "region": "EU", "tier": "gold"},
{"id": 2, "name": "Bob", "region": "US", "tier": "silver"},
{"id": 3, "name": "Charlie", "region": "US", "tier": "gold"},
{"id": 4, "name": "Diana", "region": "APAC", "tier": "bronze"},
{"id": 5, "name": "Evan", "region": "EU", "tier": "silver"},
{"id": 6, "name": "Fiona", "region": "US", "tier": "gold"},
{"id": 7, "name": "George", "region": "APAC", "tier": "gold"},
{"id": 8, "name": "Hana", "region": "EU", "tier": "bronze"},
]
_ORDERS: dict[int, list[dict[str, Any]]] = {
1: [{"product": "Widget", "qty": 3, "unit_price": 9.99}, {"product": "Gadget", "qty": 1, "unit_price": 19.99}],
2: [{"product": "Widget", "qty": 1, "unit_price": 9.99}],
3: [{"product": "Gadget", "qty": 2, "unit_price": 19.99}, {"product": "Thingamajig", "qty": 4, "unit_price": 4.50}],
4: [{"product": "Widget", "qty": 10, "unit_price": 9.99}],
5: [{"product": "Gadget", "qty": 1, "unit_price": 19.99}],
6: [{"product": "Widget", "qty": 2, "unit_price": 9.99}, {"product": "Thingamajig", "qty": 5, "unit_price": 4.50}],
7: [{"product": "Gadget", "qty": 3, "unit_price": 19.99}],
8: [{"product": "Thingamajig", "qty": 2, "unit_price": 4.50}],
}
_DISCOUNTS: dict[str, float] = {"gold": 0.20, "silver": 0.10, "bronze": 0.05}
_TAX_RATES: dict[str, float] = {"EU": 0.21, "US": 0.08, "APAC": 0.10}
def list_users() -> list[dict[str, Any]]:
"""Return all users as a list of dictionaries.
Each entry has keys: id (int), name (str), region (str), tier (str).
"""
return _USERS
def get_orders_for_user(
user_id: Annotated[int, "The user id whose orders to retrieve."],
) -> list[dict[str, Any]]:
"""Return the user's orders as a list of dictionaries.
Each entry has keys: product (str), qty (int), unit_price (float).
"""
return _ORDERS.get(user_id, [])
def get_discount_rate(
tier: Annotated[Literal["gold", "silver", "bronze"], "The customer tier."],
) -> float:
"""Return the discount rate as a float fraction (e.g. 0.2 for 20%)."""
return _DISCOUNTS[tier]
def get_tax_rate(
region: Annotated[Literal["EU", "US", "APAC"], "The region code."],
) -> float:
"""Return the tax rate as a float fraction (e.g. 0.21 for 21%)."""
return _TAX_RATES[region]
def compute_line_total(
qty: Annotated[int, "Line item quantity."],
unit_price: Annotated[float, "Line item unit price."],
discount_rate: Annotated[float, "Discount rate as a fraction (e.g. 0.2 for 20%)."],
tax_rate: Annotated[float, "Tax rate as a fraction (e.g. 0.21 for 21%)."],
) -> float:
"""Compute a single order line total.
Formula: qty * unit_price * (1 - discount_rate) * (1 + tax_rate), rounded to 2 decimals.
"""
subtotal = qty * unit_price
discounted = subtotal * (1.0 - discount_rate)
return round(discounted * (1.0 + tax_rate), 2)
TOOLS = [list_users, get_orders_for_user, get_discount_rate, get_tax_rate, compute_line_total]
# 2. Structured output schema shared between both runs.
class UserTotal(BaseModel):
"""A user's grand total of all their orders."""
user_id: int = Field(description="The user's id.")
name: str = Field(description="The user's display name.")
grand_total: float = Field(description="Sum of all line totals, rounded to 2 decimals.")
class UserGrandTotals(BaseModel):
"""Structured output schema for both runs."""
results: list[UserTotal] = Field(description="One entry per user, sorted by grand_total descending.")
INSTRUCTIONS = "You are a careful assistant. Use the provided tools for every lookup and computation."
BENCHMARK_PROMPT = (
"For every user in our system (there are 8 of them), compute the grand total of all their orders. "
"Use the compute_line_total tool for each user's orders, after looking up the relevant discount and "
"tax rates for that user. "
"Use the provided tools for EVERY data lookup (users, orders, discount rates, tax rates) and for EVERY "
"line-total computation via compute_line_total — do not invent values or hardcode any numbers. "
"The total per order item should apply the discount first and then the tax "
"(e.g. total = qty * unit_price * (1-discount) * (1+tax)). "
"Return one entry per user, sorted by grand_total descending."
)
def get_client() -> FoundryChatClient:
"""Create a FoundryChatClient from environment variables."""
return FoundryChatClient(
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
credential=AzureCliCredential(),
)
# 3. Two runners that share the same tools, prompt, and structured output schema.
async def _run_traditional() -> tuple[float, AgentResponse]:
agent = Agent(
client=get_client(),
name="TraditionalAgent",
instructions=INSTRUCTIONS,
tools=TOOLS,
default_options={"response_format": UserGrandTotals},
)
start = time.perf_counter()
result = await agent.run(BENCHMARK_PROMPT)
elapsed = time.perf_counter() - start
return elapsed, result
async def _run_codeact() -> tuple[float, AgentResponse]:
codeact = HyperlightCodeActProvider(
tools=TOOLS,
approval_mode="never_require",
)
agent = Agent(
client=get_client(),
name="CodeActAgent",
instructions=INSTRUCTIONS,
context_providers=[codeact],
default_options={"response_format": UserGrandTotals},
)
start = time.perf_counter()
result = await agent.run(BENCHMARK_PROMPT)
elapsed = time.perf_counter() - start
return elapsed, result
# 4. Report results side by side.
def _print_section(title: str) -> None:
bar = "=" * 70
print(f"\n{bar}\n{title}\n{bar}")
def _format_usage(usage: UsageDetails | None) -> str:
if usage is None:
return "usage=<none>"
return (
f"input={usage.get('input_token_count') or 0:>6} "
f"output={usage.get('output_token_count') or 0:>6} "
f"total={usage.get('total_token_count') or 0:>6}"
)
def _print_results(result: AgentResponse) -> None:
if result.value is not None:
for row in result.value.results:
print(f" user_id={row.user_id:>2} name={row.name:<8} grand_total={row.grand_total:>8.2f}")
else:
print(result.text)
async def main() -> None:
"""Run the benchmark and print a comparison."""
trad_time, trad_result = await _run_traditional()
code_time, code_result = await _run_codeact()
_print_section("Traditional tool-calling")
print(f"time={trad_time:7.2f}s {_format_usage(trad_result.usage_details)}")
_print_results(trad_result)
_print_section("CodeAct (HyperlightCodeActProvider)")
print(f"time={code_time:7.2f}s {_format_usage(code_result.usage_details)}")
_print_results(code_result)
_print_section("Comparison")
trad_total = (trad_result.usage_details or {}).get("total_token_count") or 0
code_total = (code_result.usage_details or {}).get("total_token_count") or 0
def pct(new: float, old: float) -> str:
if old == 0:
return "n/a"
delta = (new - old) / old * 100
sign = "+" if delta >= 0 else ""
return f"{sign}{delta:.1f}%"
print(f"time : traditional={trad_time:7.2f}s codeact={code_time:7.2f}s delta={pct(code_time, trad_time)}")
print(f"tokens : traditional={trad_total:7d} codeact={code_total:7d} delta={pct(code_total, trad_total)}")
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,188 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
import asyncio
import logging
import os
from collections.abc import Awaitable, Callable
from typing import Annotated, Any, Literal
from agent_framework import Agent, FunctionInvocationContext, function_middleware, tool
from agent_framework.foundry import FoundryChatClient
from azure.identity import AzureCliCredential
from dotenv import load_dotenv
from agent_framework_hyperlight import HyperlightCodeActProvider
"""This sample demonstrates the provider-owned Hyperlight CodeAct flow.
The sample keeps `compute` and `fetch_data` off the direct agent tool surface and
registers them only with `HyperlightCodeActProvider`. The model therefore sees a
single `execute_code` tool and must call the provider-owned tools from inside
the sandbox with `call_tool(...)`.
"""
load_dotenv()
_CYAN = "\033[36m"
_YELLOW = "\033[33m"
_GREEN = "\033[32m"
_DIM = "\033[2m"
_RESET = "\033[0m"
class _ColoredFormatter(logging.Formatter):
"""Dim logger output so it does not compete with sample prints."""
def format(self, record: logging.LogRecord) -> str:
return f"{_DIM}{super().format(record)}{_RESET}"
logging.basicConfig(level=logging.WARNING)
logging.getLogger().handlers[0].setFormatter(
_ColoredFormatter("[%(asctime)s] %(levelname)s: %(message)s"),
)
@function_middleware
async def log_function_calls(
context: FunctionInvocationContext,
call_next: Callable[[], Awaitable[None]],
) -> None:
"""Log tool calls, including readable execute_code blocks."""
import time
function_name = context.function.name
arguments = context.arguments if isinstance(context.arguments, dict) else {}
if function_name == "execute_code" and "code" in arguments:
print(f"\n{_YELLOW}{'' * 60}")
print("▶ execute_code")
print(f"{'' * 60}{_RESET}")
print(arguments["code"])
print(f"{_YELLOW}{'' * 60}{_RESET}")
else:
pairs = ", ".join(f"{name}={value!r}" for name, value in arguments.items())
print(f"\n{_YELLOW}{function_name}({pairs}){_RESET}")
start = time.perf_counter()
await call_next()
elapsed = time.perf_counter() - start
result = context.result
if function_name == "execute_code" and isinstance(result, list):
for output in result:
if output.type == "text" and output.text:
print(f"{_GREEN}stdout:\n{output.text}{_RESET}")
elif output.type == "error" and output.error_details:
print(f"{_YELLOW}stderr:\n{output.error_details}{_RESET}")
else:
print(f"{_YELLOW}{function_name}{result!r}{_RESET}")
print(f"{_DIM} ({elapsed:.4f}s){_RESET}")
@tool(approval_mode="never_require")
def compute(
operation: Annotated[
Literal["add", "subtract", "multiply", "divide"],
"Math operation: add, subtract, multiply, or divide.",
],
a: Annotated[float, "First numeric operand."],
b: Annotated[float, "Second numeric operand."],
) -> float:
"""Perform a math operation for sandboxed code."""
operations = {
"add": a + b,
"subtract": a - b,
"multiply": a * b,
"divide": a / b if b else float("inf"),
}
return operations[operation]
@tool(approval_mode="never_require")
async def fetch_data(
table: Annotated[str, "Name of the simulated table to query."],
) -> list[dict[str, Any]]:
"""Fetch records from a named table."""
await asyncio.sleep(0.5)
data: dict[str, list[dict[str, Any]]] = {
"users": [
{"id": 1, "name": "Alice", "role": "admin"},
{"id": 2, "name": "Bob", "role": "user"},
{"id": 3, "name": "Charlie", "role": "admin"},
],
"products": [
{"id": 101, "name": "Widget", "price": 9.99},
{"id": 102, "name": "Gadget", "price": 19.99},
],
}
return data.get(table, [])
async def main() -> None:
"""Run the provider-owned Hyperlight CodeAct sample."""
# 1. Create the Hyperlight-backed provider and register sandbox tools on it.
codeact = HyperlightCodeActProvider(
tools=[compute, fetch_data],
approval_mode="never_require",
)
# 2. Create the client and the agent.
agent = Agent(
client=FoundryChatClient(
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
credential=AzureCliCredential(),
),
name="HyperlightCodeActProviderAgent",
instructions="You are a helpful assistant.",
context_providers=[codeact],
middleware=[log_function_calls],
)
# 3. Run a request that should use execute_code plus provider-owned tools.
query = (
"Fetch all users, find admins, multiply 7*(3*2), and print the users, "
"admins, and multiplication result. Use execute_code and call_tool(...) "
"inside the sandbox."
)
print(f"{_CYAN}{'=' * 60}")
print("Hyperlight CodeAct provider sample")
print(f"{'=' * 60}{_RESET}")
print(f"{_CYAN}User: {query}{_RESET}")
result = await agent.run(query)
print(f"{_CYAN}Agent: {result.text}{_RESET}")
"""
Sample output (shape only):
============================================================
Hyperlight CodeAct provider sample
============================================================
User: Fetch all users, find admins, multiply 7*(3*2), ...
────────────────────────────────────────────────────────────
▶ execute_code
────────────────────────────────────────────────────────────
users = call_tool("fetch_data", table="users")
admins = [user for user in users if user["role"] == "admin"]
result = call_tool("compute", operation="multiply", a=7, b=6)
print("Users:", users)
print("Admins:", admins)
print("7 * 6 =", result)
────────────────────────────────────────────────────────────
stdout:
Users: [...]
Admins: [...]
7 * 6 = 42.0
(0.0xxx s)
Agent: ...
"""
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,133 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
import asyncio
import os
from typing import Annotated, Any, Literal
from agent_framework import Agent, tool
from agent_framework.foundry import FoundryChatClient
from azure.identity import AzureCliCredential
from dotenv import load_dotenv
from agent_framework_hyperlight import HyperlightExecuteCodeTool
"""This sample demonstrates manual static wiring of CodeAct without a provider.
Instead of using `HyperlightCodeActProvider` with `context_providers=`, this
sample creates a `HyperlightExecuteCodeTool` directly, extracts its CodeAct
instructions once, and passes both to the `Agent` constructor at build time.
This avoids the per-run provider lifecycle (`before_run` / `after_run`) and is
well-suited when the tool registry, file mounts, and network allow-list are
fixed for the agent's lifetime. The tradeoff is that dynamic tool or capability
changes between runs are not supported — any mutations to the tool would not
update the agent's instructions automatically.
"""
load_dotenv()
@tool(approval_mode="never_require")
def compute(
operation: Annotated[
Literal["add", "subtract", "multiply", "divide"],
"Math operation: add, subtract, multiply, or divide.",
],
a: Annotated[float, "First numeric operand."],
b: Annotated[float, "Second numeric operand."],
) -> float:
"""Perform a math operation used by sandboxed code."""
operations = {
"add": a + b,
"subtract": a - b,
"multiply": a * b,
"divide": a / b if b else float("inf"),
}
return operations[operation]
@tool(approval_mode="never_require")
def fetch_data(
table: Annotated[str, "Name of the simulated table to query."],
) -> list[dict[str, Any]]:
"""Fetch simulated records from a named table."""
data: dict[str, list[dict[str, Any]]] = {
"users": [
{"id": 1, "name": "Alice", "role": "admin"},
{"id": 2, "name": "Bob", "role": "user"},
{"id": 3, "name": "Charlie", "role": "admin"},
],
"products": [
{"id": 101, "name": "Widget", "price": 9.99},
{"id": 102, "name": "Gadget", "price": 19.99},
],
}
return data.get(table, [])
@tool(approval_mode="never_require")
def send_email(
to: Annotated[str, "Recipient email address."],
subject: Annotated[str, "Email subject line."],
body: Annotated[str, "Email body text."],
) -> str:
"""Simulate sending an email (direct-only tool, not available inside the sandbox)."""
return f"Email sent to {to}: {subject}"
async def main() -> None:
"""Run the manual static-wiring sample."""
# 1. Create the execute_code tool and register sandbox tools on it.
execute_code = HyperlightExecuteCodeTool(
tools=[compute, fetch_data],
approval_mode="never_require",
)
# 2. Build CodeAct instructions once. Setting tools_visible_to_model=False
# tells the instructions builder that sandbox tools are not in the agent's
# direct tool list, so the model must use call_tool(...) inside execute_code.
codeact_instructions = execute_code.build_instructions(tools_visible_to_model=False)
# 3. Create the client and the agent with everything wired at construction time.
# - send_email is a direct-only tool (not available inside the sandbox).
# - execute_code carries sandbox tools (compute, fetch_data) via call_tool.
agent = Agent(
client=FoundryChatClient(
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
credential=AzureCliCredential(),
),
name="ManualWiringAgent",
instructions=f"You are a helpful assistant.\n\n{codeact_instructions}",
tools=[send_email, execute_code],
)
# 4. Run a request that exercises both the sandbox and the direct tool.
print("=" * 60)
print("Manual static-wiring CodeAct sample")
print("=" * 60)
query = (
"Fetch all users, find admins, multiply 6*7, and print the users, admins, "
"and multiplication result. Use one execute_code call. "
"Then send an email to admin@example.com summarising the results."
)
print(f"User: {query}")
result = await agent.run(query)
print(f"Agent: {result.text}")
"""
Sample output (shape only):
============================================================
Manual static-wiring CodeAct sample
============================================================
User: Fetch all users, find admins, multiply 6*7, ...
Agent: ...
"""
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,110 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
import asyncio
import os
from typing import Annotated, Any, Literal
from agent_framework import Agent, tool
from agent_framework.foundry import FoundryChatClient
from azure.identity import AzureCliCredential
from dotenv import load_dotenv
from agent_framework_hyperlight import HyperlightExecuteCodeTool
"""This sample demonstrates the standalone Hyperlight execute_code tool.
The sample adds `HyperlightExecuteCodeTool` directly to the agent. The tool's
own description advertises `call_tool(...)`, the registered sandbox tools, and
the current capability configuration, so no extra CodeAct-specific agent
instructions are required.
"""
load_dotenv()
@tool(approval_mode="never_require")
def compute(
operation: Annotated[
Literal["add", "subtract", "multiply", "divide"],
"Math operation: add, subtract, multiply, or divide.",
],
a: Annotated[float, "First numeric operand."],
b: Annotated[float, "Second numeric operand."],
) -> float:
"""Perform a math operation used by sandboxed code."""
operations = {
"add": a + b,
"subtract": a - b,
"multiply": a * b,
"divide": a / b if b else float("inf"),
}
return operations[operation]
@tool(approval_mode="never_require")
def fetch_data(
table: Annotated[str, "Name of the simulated table to query."],
) -> list[dict[str, Any]]:
"""Fetch simulated records from a named table."""
data: dict[str, list[dict[str, Any]]] = {
"users": [
{"id": 1, "name": "Alice", "role": "admin"},
{"id": 2, "name": "Bob", "role": "user"},
{"id": 3, "name": "Charlie", "role": "admin"},
],
"products": [
{"id": 101, "name": "Widget", "price": 9.99},
{"id": 102, "name": "Gadget", "price": 19.99},
],
}
return data.get(table, [])
async def main() -> None:
"""Run the standalone execute_code sample."""
# 1. Create the packaged execute_code tool and register sandbox tools on it.
execute_code = HyperlightExecuteCodeTool(
tools=[compute, fetch_data],
approval_mode="never_require",
)
# 2. Create the client and the agent.
agent = Agent(
client=FoundryChatClient(
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
credential=AzureCliCredential(),
),
name="HyperlightExecuteCodeToolAgent",
instructions="You are a helpful assistant.",
tools=execute_code,
)
# 3. Run one request through the direct-tool surface.
print("=" * 60)
print("Hyperlight execute_code tool sample")
print("=" * 60)
query = (
"Fetch all users, find admins, multiply 6*7, and print the users, admins, "
"and multiplication result. Use one execute_code call."
)
print(f"User: {query}")
result = await agent.run(query)
print(f"Agent: {result.text}")
"""
Sample output (shape only):
============================================================
Hyperlight execute_code tool sample
============================================================
User: Fetch all users, find admins, multiply 6*7, ...
Agent: ...
"""
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,939 @@
# Copyright (c) Microsoft. All rights reserved.
from __future__ import annotations
import asyncio
import importlib.metadata
import importlib.util
import inspect
import json
import sys
import threading
import time
from collections.abc import Awaitable, Callable, Mapping, MutableSequence
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import pytest
from agent_framework import (
Agent,
BaseChatClient,
ChatResponse,
ChatResponseUpdate,
Content,
FunctionInvocationLayer,
FunctionTool,
Message,
ResponseStream,
tool,
)
from agent_framework_hyperlight import AllowedDomain, FileMount, HyperlightCodeActProvider, HyperlightExecuteCodeTool
from agent_framework_hyperlight import _execute_code_tool as execute_code_module
def _hyperlight_integration_static_skip_reason() -> str | None:
if sys.version_info >= (3, 14):
return (
"Hyperlight integration tests require Python < 3.14 because hyperlight-sandbox-backend-wasm is unsupported."
)
if sys.platform not in {"linux", "win32"}:
return "Hyperlight integration tests require Linux or Windows runners."
if importlib.util.find_spec("hyperlight_sandbox") is None:
return "hyperlight-sandbox is not installed."
if importlib.util.find_spec("python_guest") is None:
return "hyperlight-sandbox-python-guest is not installed."
try:
importlib.metadata.version("hyperlight-sandbox-backend-wasm")
except importlib.metadata.PackageNotFoundError:
return "hyperlight-sandbox-backend-wasm is not installed."
return None
def _hyperlight_integration_runtime_skip_reason() -> str | None:
if (reason := _hyperlight_integration_static_skip_reason()) is not None:
return reason
try:
sandbox_cls = execute_code_module._load_sandbox_class()
sandbox = sandbox_cls(
backend=execute_code_module.DEFAULT_HYPERLIGHT_BACKEND,
module=execute_code_module.DEFAULT_HYPERLIGHT_MODULE,
)
sandbox.run("None")
except RuntimeError as exc:
message = str(exc)
if "no hypervisor was found for sandbox" in message.lower():
return "Hyperlight integration tests require a runner with a working Hyperlight hypervisor."
return None
def _skip_if_hyperlight_integration_runtime_disabled() -> None:
if (reason := _hyperlight_integration_runtime_skip_reason()) is not None:
pytest.skip(reason)
skip_if_hyperlight_integration_tests_disabled = pytest.mark.skipif(
(reason := _hyperlight_integration_static_skip_reason()) is not None,
reason=reason or "Hyperlight integration tests are disabled.",
)
@pytest.fixture(scope="module")
def shared_sandbox():
"""Long-lived sandbox with snapshot/restore for read-mostly tests.
Multiple tests run sequentially against this fixture. Each test restores the
sandbox to a clean state via the ``restored_sandbox`` fixture.
"""
if (reason := _hyperlight_integration_runtime_skip_reason()) is not None:
pytest.skip(reason)
sandbox_cls = execute_code_module._load_sandbox_class()
sandbox = sandbox_cls(
backend=execute_code_module.DEFAULT_HYPERLIGHT_BACKEND,
module=execute_code_module.DEFAULT_HYPERLIGHT_MODULE,
)
sandbox.run("None")
snapshot = sandbox.snapshot()
yield sandbox, snapshot
@pytest.fixture
def restored_sandbox(shared_sandbox):
"""Restore shared sandbox to clean state before each test."""
sandbox, snapshot = shared_sandbox
sandbox.restore(snapshot)
return sandbox
@pytest.fixture
def fresh_sandbox():
"""Short-lived sandbox for tests that alter config meaningfully.
Not pre-warmed: call ``sandbox.run("None")`` after registering tools
and domains, then snapshot/restore before executing test code.
"""
if (reason := _hyperlight_integration_runtime_skip_reason()) is not None:
pytest.skip(reason)
sandbox_cls = execute_code_module._load_sandbox_class()
sandbox = sandbox_cls(
backend=execute_code_module.DEFAULT_HYPERLIGHT_BACKEND,
module=execute_code_module.DEFAULT_HYPERLIGHT_MODULE,
temp_output=True,
)
yield sandbox
@tool(approval_mode="never_require")
def compute(a: int, b: int) -> int:
return a + b
@tool(approval_mode="always_require")
def dangerous_compute(a: int, b: int) -> int:
return a * b
@tool(name="compute", approval_mode="always_require")
def replacement_compute(a: int, b: int) -> int:
return a - b
@dataclass(slots=True)
class _FakeResult:
success: bool
stdout: str = ""
stderr: str = ""
def _run_in_thread(callback: Callable[[], Any]) -> Any:
result: dict[str, Any] = {}
error: dict[str, BaseException] = {}
def _runner() -> None:
try:
result["value"] = callback()
except BaseException as exc:
error["value"] = exc
thread = threading.Thread(target=_runner)
thread.start()
thread.join()
if "value" in error:
raise error["value"]
return result.get("value")
class _FakeSandbox:
instances: list[_FakeSandbox] = []
def __init__(
self,
*,
input_dir: str | None = None,
output_dir: str | None = None,
temp_output: bool = False,
backend: str = "wasm",
module: str | None = None,
module_path: str | None = None,
heap_size: str | None = None,
stack_size: str | None = None,
) -> None:
self.input_dir = input_dir
self.output_dir = output_dir
self.registered_tools: dict[str, Any] = {}
self.allowed_domains: list[tuple[str, list[str] | None]] = []
self.restore_calls: list[Any] = []
self.output_files: list[str] = []
_FakeSandbox.instances.append(self)
def register_tool(self, name_or_tool: Any, callback: Any | None = None) -> None:
if callback is None:
raise AssertionError("Expected callback registration for sandbox tools.")
self.registered_tools[str(name_or_tool)] = callback
def allow_domain(self, target: str, methods: list[str] | None = None) -> None:
self.allowed_domains.append((target, methods))
def _invoke_tool(self, name: str, **kwargs: Any) -> Any:
callback = self.registered_tools[name]
if inspect.iscoroutinefunction(callback):
return _run_in_thread(lambda: asyncio.run(callback(**kwargs)))
result = callback(**kwargs)
if inspect.isawaitable(result):
return _run_in_thread(lambda: asyncio.run(result))
return result
def run(self, code: str) -> _FakeResult:
if code == "None":
return _FakeResult(success=True)
if code == "create-output":
if self.output_dir is None:
raise AssertionError("Expected output directory for create-output test.")
Path(self.output_dir, "report.txt").write_text("artifact", encoding="utf-8")
self.output_files = ["report.txt"]
return _FakeResult(success=True, stdout="done\n")
if 'call_tool("compute", a=20, b=22)' in code:
total = self._invoke_tool("compute", a=20, b=22)
return _FakeResult(success=True, stdout=f"{total}\n")
return _FakeResult(success=False, stderr="sandbox boom")
def snapshot(self) -> str:
return "snapshot"
def restore(self, snapshot: Any) -> None:
self.restore_calls.append(snapshot)
def get_output_files(self) -> list[str]:
return list(self.output_files)
class _FakeRuntime:
def __init__(self) -> None:
self.calls: list[tuple[Any, str]] = []
def execute(self, *, config: Any, code: str) -> list[Content]:
self.calls.append((config, code))
return [Content.from_text("ok")]
class _FakeSandboxWithoutOutputListing(_FakeSandbox):
def get_output_files(self) -> list[str]:
return []
class _FakeSandboxWithDelayedUnlistedOutput(_FakeSandboxWithoutOutputListing):
writer_threads: list[threading.Thread] = []
def run(self, code: str) -> _FakeResult:
if 'Path("/output/report.txt").write_text("artifact", encoding="utf-8")' in code:
if self.output_dir is None:
raise AssertionError("Expected output directory for delayed output test.")
def _write_file() -> None:
time.sleep(0.15)
Path(self.output_dir, "report.txt").write_text("artifact", encoding="utf-8")
writer_thread = threading.Thread(target=_write_file)
writer_thread.start()
self.writer_threads.append(writer_thread)
return _FakeResult(success=True)
return super().run(code)
class _FakeSessionContext:
def __init__(self, *, tools: list[Any] | None = None) -> None:
self.options: dict[str, Any] = {}
if tools is not None:
self.options["tools"] = tools
self.instructions: list[tuple[str, str]] = []
self.tools: list[tuple[str, list[Any]]] = []
def extend_instructions(self, source_id: str, instructions: str) -> None:
self.instructions.append((source_id, instructions))
def extend_tools(self, source_id: str, tools: list[Any]) -> None:
self.tools.append((source_id, tools))
def _extract_text_output(function_result: Content) -> str:
assert function_result.type == "function_result"
assert function_result.exception is None, (
f"execute_code raised {function_result.exception!r} with items={function_result.items!r}"
)
text_output = next(
(item for item in function_result.items or [] if item.type == "text" and item.text is not None),
None,
)
if text_output is not None and text_output.text is not None:
return text_output.text
if function_result.result:
return function_result.result
raise AssertionError(f"Expected text output from execute_code, got {function_result.items!r}")
class _FakeCodeActChatClient(FunctionInvocationLayer[Any], BaseChatClient[Any]):
def __init__(self) -> None:
FunctionInvocationLayer.__init__(self)
BaseChatClient.__init__(self)
self.call_count = 0
def _inner_get_response(
self,
*,
messages: MutableSequence[Message],
stream: bool,
options: Mapping[str, Any],
**kwargs: Any,
) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
if stream:
raise AssertionError("Streaming is not used in this integration test.")
async def _get_response() -> ChatResponse:
self.call_count += 1
if self.call_count == 1:
return ChatResponse(
messages=Message(
role="assistant",
contents=[
Content.from_function_call(
call_id="execute_code_call",
name="execute_code",
arguments={
"code": 'total = call_tool("compute", a=20, b=22)\nprint(total)',
},
)
],
)
)
function_results = [
content for message in messages for content in message.contents if content.type == "function_result"
]
assert len(function_results) == 1
result_content = function_results[0]
assert result_content.call_id == "execute_code_call"
assert _extract_text_output(result_content) == "42\n"
return ChatResponse(messages=Message(role="assistant", contents=["The sandbox returned 42."]))
return _get_response()
def test_execute_code_tool_updates_approval_with_managed_tools() -> None:
execute_code = HyperlightExecuteCodeTool(tools=[compute], _registry=_FakeRuntime())
assert execute_code.approval_mode == "never_require"
execute_code.add_tools([dangerous_compute])
assert execute_code.approval_mode == "always_require"
def test_execute_code_tool_replaces_tools_with_the_same_name() -> None:
execute_code = HyperlightExecuteCodeTool(tools=[compute], _registry=_FakeRuntime())
execute_code.add_tools(replacement_compute)
tools = execute_code.get_tools()
assert len(tools) == 1
assert tools[0] is replacement_compute
assert execute_code.approval_mode == "always_require"
def test_execute_code_tool_accepts_string_and_tuple_file_mounts_without_mode_flags(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
shorthand_file = tmp_path / "notes.txt"
shorthand_file.write_text("hello", encoding="utf-8")
explicit_file = tmp_path / "data.json"
explicit_file.write_text('{"hello": "world"}', encoding="utf-8")
monkeypatch.chdir(tmp_path)
execute_code = HyperlightExecuteCodeTool(_registry=_FakeRuntime())
execute_code.add_file_mounts("notes.txt")
execute_code.add_file_mounts((explicit_file, "data/data.json"))
assert execute_code.get_file_mounts() == [
FileMount(shorthand_file.resolve(), "/input/notes.txt"),
FileMount(explicit_file.resolve(), "/input/data/data.json"),
]
async def test_execute_code_tool_populates_input_dir_with_workspace_and_file_mounts(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
_FakeSandbox.instances.clear()
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox)
workspace_root = tmp_path / "workspace"
workspace_root.mkdir()
(workspace_root / "notes.txt").write_text("workspace note", encoding="utf-8")
mounted_file = tmp_path / "mounted.txt"
mounted_file.write_text("hello from mount", encoding="utf-8")
execute_code = HyperlightExecuteCodeTool(
workspace_root=workspace_root,
file_mounts=[FileMount(mounted_file, "data/input.txt")],
)
result = await execute_code.invoke(arguments={"code": "None"})
assert result[0].type == "text"
assert _FakeSandbox.instances[0].input_dir is not None
input_root = Path(_FakeSandbox.instances[0].input_dir)
assert (input_root / "notes.txt").read_text(encoding="utf-8") == "workspace note"
assert (input_root / "data" / "input.txt").read_text(encoding="utf-8") == "hello from mount"
def test_execute_code_tool_allowed_domains_use_structured_entries_and_replace_by_target() -> None:
execute_code = HyperlightExecuteCodeTool(_registry=_FakeRuntime())
execute_code.add_allowed_domains(["https://api.example.com/v1", ("github.com", "get")])
execute_code.add_allowed_domains([
AllowedDomain("api.example.com", ("post", "get")),
("github.com", ["head", "get"]),
])
assert execute_code.get_allowed_domains() == [
AllowedDomain("api.example.com", ("GET", "POST")),
AllowedDomain("github.com", ("GET", "HEAD")),
]
def test_execute_code_tool_description_contains_call_tool_guidance(tmp_path: Path) -> None:
workspace_root = tmp_path / "workspace"
workspace_root.mkdir()
(workspace_root / "notes.txt").write_text("hello", encoding="utf-8")
mount_file = tmp_path / "data.json"
mount_file.write_text('{"hello": "world"}', encoding="utf-8")
execute_code = HyperlightExecuteCodeTool(
tools=[compute],
workspace_root=workspace_root,
file_mounts=[FileMount(str(mount_file), "data/data.json")],
allowed_domains=[AllowedDomain("https://api.example.com/v1", ("get", "post")), "github.com"],
_registry=_FakeRuntime(),
)
description = execute_code.description
assert "call_tool(name, **kwargs)" in description
assert "compute" in description
assert "/input/data/data.json" in description
assert "/output" in description
assert "api.example.com" in description
assert "GET, POST" in description
assert "github.com" in description
async def test_execute_code_tool_executes_with_structured_content(monkeypatch: pytest.MonkeyPatch) -> None:
_FakeSandbox.instances.clear()
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox)
execute_code = HyperlightExecuteCodeTool(
tools=[compute],
file_mounts=[FileMount(Path(__file__), "fixtures/source.py")],
allowed_domains=[("api.example.com", "get")],
)
result = await execute_code.invoke(arguments={"code": "create-output"})
assert result[0].type == "text"
assert result[0].text == "done\n"
assert any(item.type == "data" for item in result)
assert _FakeSandbox.instances[0].allowed_domains == [("api.example.com", ["GET"])]
assert "compute" in _FakeSandbox.instances[0].registered_tools
async def test_execute_code_tool_collects_output_files_without_backend_listing(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandboxWithoutOutputListing)
execute_code = HyperlightExecuteCodeTool(
file_mounts=[FileMount(Path(__file__), "fixtures/source.py")],
)
result = await execute_code.invoke(arguments={"code": "create-output"})
assert result[0].type == "text"
assert any(item.type == "data" and item.additional_properties["path"] == "/output/report.txt" for item in result)
async def test_execute_code_tool_waits_for_unlisted_output_files_to_appear(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_FakeSandboxWithDelayedUnlistedOutput.writer_threads.clear()
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandboxWithDelayedUnlistedOutput)
execute_code = HyperlightExecuteCodeTool(
file_mounts=[FileMount(Path(__file__), "fixtures/source.py")],
)
result = await execute_code.invoke(
arguments={"code": 'Path("/output/report.txt").write_text("artifact", encoding="utf-8")'}
)
for writer_thread in _FakeSandboxWithDelayedUnlistedOutput.writer_threads:
writer_thread.join()
assert any(item.type == "data" and item.additional_properties["path"] == "/output/report.txt" for item in result)
async def test_execute_code_tool_failure_returns_error_content(monkeypatch: pytest.MonkeyPatch) -> None:
_FakeSandbox.instances.clear()
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox)
execute_code = HyperlightExecuteCodeTool()
result = await execute_code.invoke(arguments={"code": "fail"})
assert result[0].type == "error"
assert result[0].error_details == "sandbox boom"
async def test_execute_code_tool_retries_allowed_domains_with_urls_when_backend_rejects_host_targets(
monkeypatch: pytest.MonkeyPatch,
) -> None:
class _FakeStrictNetworkSandbox:
instances: list[_FakeStrictNetworkSandbox] = []
def __init__(
self,
*,
input_dir: str | None = None,
output_dir: str | None = None,
backend: str = "wasm",
module: str | None = None,
module_path: str | None = None,
) -> None:
del input_dir, output_dir, backend, module, module_path
self.allowed_domains: list[tuple[str, list[str] | None]] = []
_FakeStrictNetworkSandbox.instances.append(self)
def register_tool(self, name_or_tool: Any, callback: Any | None = None) -> None:
del name_or_tool, callback
def allow_domain(self, target: str, methods: list[str] | None = None) -> None:
self.allowed_domains.append((target, methods))
def run(self, code: str) -> _FakeResult:
if code == "None" and any("://" not in target for target, _ in self.allowed_domains):
raise RuntimeError("invalid URL for network permission: ")
return _FakeResult(success=True)
def snapshot(self) -> str:
return "snapshot"
def restore(self, snapshot: Any) -> None:
del snapshot
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeStrictNetworkSandbox)
execute_code = HyperlightExecuteCodeTool(allowed_domains=[("127.0.0.1:8080", "get")])
result = await execute_code.invoke(arguments={"code": "None"})
assert result[0].type == "text"
assert len(_FakeStrictNetworkSandbox.instances) == 2
assert _FakeStrictNetworkSandbox.instances[0].allowed_domains == [("127.0.0.1:8080", ["GET"])]
assert _FakeStrictNetworkSandbox.instances[1].allowed_domains == [
("http://127.0.0.1:8080", ["GET"]),
("https://127.0.0.1:8080", ["GET"]),
]
def test_hyperlight_integration_runtime_skip_reason_reports_missing_hypervisor(monkeypatch: pytest.MonkeyPatch) -> None:
class _FakeNoHypervisorSandbox:
def __init__(
self,
*,
input_dir: str | None = None,
output_dir: str | None = None,
backend: str = "wasm",
module: str | None = None,
module_path: str | None = None,
) -> None:
del input_dir, output_dir, backend, module, module_path
def run(self, code: str) -> _FakeResult:
del code
raise RuntimeError("failed to build ProtoWasmSandbox: No Hypervisor was found for Sandbox")
original_find_spec = importlib.util.find_spec
def _fake_find_spec(name: str) -> object | None:
if name in {"hyperlight_sandbox", "python_guest"}:
return object()
return original_find_spec(name)
monkeypatch.setattr(sys, "version_info", (3, 13, 0))
monkeypatch.setattr(sys, "platform", "linux")
monkeypatch.setattr(importlib.util, "find_spec", _fake_find_spec)
monkeypatch.setattr(importlib.metadata, "version", lambda _: "0.0.0")
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeNoHypervisorSandbox)
assert _hyperlight_integration_runtime_skip_reason() == (
"Hyperlight integration tests require a runner with a working Hyperlight hypervisor."
)
async def test_provider_injects_run_scoped_execute_code_tool() -> None:
runtime = _FakeRuntime()
provider = HyperlightCodeActProvider(tools=[compute], _registry=runtime)
context = _FakeSessionContext(tools=[dangerous_compute])
state: dict[str, Any] = {}
await provider.before_run(agent=object(), session=None, context=context, state=state)
assert context.options["tools"] == [dangerous_compute]
assert len(context.instructions) == 1
assert len(context.tools) == 1
run_tool = context.tools[0][1][0]
assert isinstance(run_tool, HyperlightExecuteCodeTool)
assert run_tool.approval_mode == "never_require"
assert [tool_obj.name for tool_obj in run_tool.get_tools()] == ["compute"]
assert "dangerous_compute" not in context.instructions[0][1]
assert "compute" not in context.instructions[0][1]
assert "Filesystem capabilities:" not in context.instructions[0][1]
assert state[provider.source_id]["tool_names"] == ["compute"]
assert state[provider.source_id]["approval_mode"] == "never_require"
json.dumps(state)
provider.remove_tool("compute")
assert [tool_obj.name for tool_obj in run_tool.get_tools()] == ["compute"]
async def test_agent_runs_hyperlight_codeact_end_to_end_with_fake_sandbox(monkeypatch: pytest.MonkeyPatch) -> None:
_FakeSandbox.instances.clear()
monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox)
client = _FakeCodeActChatClient()
provider = HyperlightCodeActProvider(tools=[compute])
agent = Agent(client=client, context_providers=[provider])
response = await agent.run("Use the sandbox to add 20 and 22.")
assert response.text == "The sandbox returned 42."
assert client.call_count == 2
assert len(_FakeSandbox.instances) == 1
assert "compute" in _FakeSandbox.instances[0].registered_tools
@skip_if_hyperlight_integration_tests_disabled
async def test_agent_runs_hyperlight_codeact_end_to_end_with_real_sandbox() -> None:
_skip_if_hyperlight_integration_runtime_disabled()
client = _FakeCodeActChatClient()
provider = HyperlightCodeActProvider(tools=[compute])
agent = Agent(client=client, context_providers=[provider])
response = await agent.run("Use the sandbox to add 20 and 22.")
assert response.text == "The sandbox returned 42."
assert client.call_count == 2
@skip_if_hyperlight_integration_tests_disabled
async def test_provider_run_tool_writes_files_with_real_sandbox(tmp_path: Path) -> None:
_skip_if_hyperlight_integration_runtime_disabled()
workspace_root = tmp_path / "workspace"
workspace_root.mkdir()
provider = HyperlightCodeActProvider(workspace_root=workspace_root)
context = _FakeSessionContext()
state: dict[str, Any] = {}
await provider.before_run(agent=object(), session=None, context=context, state=state)
run_tool = context.tools[0][1][0]
assert isinstance(run_tool, HyperlightExecuteCodeTool)
result = await run_tool.invoke(
arguments={
"code": (
'payload = "hello from sandbox"\n'
"output_path = None\n"
'for candidate in ("/output/result.txt",):\n'
" try:\n"
' with open(candidate, "w", encoding="utf-8") as f:\n'
" f.write(payload)\n"
" except OSError:\n"
" continue\n"
" output_path = candidate\n"
" break\n"
'assert output_path is not None, "output path unavailable"\n'
'print("validated")\n'
)
}
)
outputs = result
error_outputs = [
f"{item.message}: {item.error_details}"
for item in outputs
if item.type == "error" and item.error_details is not None
]
assert not error_outputs, error_outputs
text_output = next((item for item in outputs if item.type == "text" and item.text is not None), None)
if text_output is not None:
assert text_output.text == "validated\n"
file_output = next((item for item in outputs if item.type == "data"), None)
if file_output is not None:
assert file_output.uri is not None and file_output.uri.startswith("data:")
assert file_output.additional_properties["path"] in {"/output/result.txt", "/output/output/result.txt"}
@pytest.mark.integration
@skip_if_hyperlight_integration_tests_disabled
@pytest.mark.skipif(sys.platform == "win32", reason="Hyperlight WASM sandbox lacks encodings.idna on Windows")
async def test_provider_run_tool_pings_bing_with_real_sandbox() -> None:
_skip_if_hyperlight_integration_runtime_disabled()
provider = HyperlightCodeActProvider()
provider.add_allowed_domains("bing.com")
context = _FakeSessionContext()
state: dict[str, Any] = {}
await provider.before_run(agent=object(), session=None, context=context, state=state)
run_tool = context.tools[0][1][0]
assert isinstance(run_tool, HyperlightExecuteCodeTool)
result = await run_tool.invoke(
arguments={
"code": (
"import _socket\n\n"
'addresses = _socket.getaddrinfo("bing.com", 80, _socket.AF_INET, _socket.SOCK_STREAM)\n'
'assert addresses, "bing.com did not resolve"\n'
"last_error = None\n"
"for family, socktype, proto, _, sockaddr in addresses:\n"
" connection = None\n"
" try:\n"
" connection = _socket.socket(family, socktype, proto)\n"
" connection.settimeout(10)\n"
" connection.connect(sockaddr)\n"
' print("pinged bing.com")\n'
" break\n"
" except OSError as exc:\n"
" last_error = exc\n"
" finally:\n"
" if connection is not None:\n"
" try:\n"
" connection.close()\n"
" except OSError:\n"
" pass\n"
"else:\n"
' raise last_error or RuntimeError("unable to reach bing.com")\n'
)
}
)
outputs = result
error_outputs = [
f"{item.message}: {item.error_details}"
for item in outputs
if item.type == "error" and item.error_details is not None
]
assert not error_outputs, error_outputs
text_output = next((item for item in outputs if item.type == "text" and item.text is not None), None)
if text_output is not None:
assert text_output.text == "pinged bing.com\n"
# ---------------------------------------------------------------------------
# Real-sandbox tests using shared (long-lived) fixture
# ---------------------------------------------------------------------------
@skip_if_hyperlight_integration_tests_disabled
async def test_sandbox_runs_simple_code(restored_sandbox) -> None:
result = restored_sandbox.run('print("hello")')
assert result.success
assert "hello" in result.stdout
@skip_if_hyperlight_integration_tests_disabled
async def test_sandbox_stdout_and_stderr_captured(restored_sandbox) -> None:
result = restored_sandbox.run('import sys\nprint("out")\nprint("err", file=sys.stderr)')
assert result.success
assert "out" in result.stdout
assert "err" in result.stderr
@skip_if_hyperlight_integration_tests_disabled
async def test_sandbox_code_failure_returns_nonzero_exit(restored_sandbox) -> None:
result = restored_sandbox.run("raise ValueError('boom')")
assert not result.success
assert "boom" in result.stderr
@skip_if_hyperlight_integration_tests_disabled
async def test_sandbox_snapshot_restore_keeps_sandbox_functional(restored_sandbox) -> None:
"""Verify snapshot/restore cycle leaves the sandbox in a working state."""
# Mutate the sandbox
result1 = restored_sandbox.run('print("before snapshot")')
assert result1.success
# Take a snapshot and restore
snapshot = restored_sandbox.snapshot()
restored_sandbox.restore(snapshot)
# Sandbox still works after restore
result2 = restored_sandbox.run('print("after restore")')
assert result2.success
assert "after restore" in result2.stdout
# ---------------------------------------------------------------------------
# Real-sandbox tests using fresh (short-lived) fixture
# ---------------------------------------------------------------------------
@skip_if_hyperlight_integration_tests_disabled
async def test_sandbox_with_tool_registration_and_execution(fresh_sandbox) -> None:
"""Verify that a sync host tool round-trips via call_tool in the real sandbox."""
def multiply(a: int, b: int) -> int:
return a * b
fresh_sandbox.register_tool("multiply", multiply)
fresh_sandbox.run("None")
snapshot = fresh_sandbox.snapshot()
fresh_sandbox.restore(snapshot)
result = fresh_sandbox.run('result = call_tool("multiply", a=6, b=7)\nprint(result)')
assert result.success
assert "42" in result.stdout
@skip_if_hyperlight_integration_tests_disabled
async def test_sandbox_async_callback_round_trips_with_real_sandbox(fresh_sandbox) -> None:
"""Confirm that _make_sandbox_callback (sync wrapper) works with real FFI."""
sandbox_tool = FunctionTool(
func=compute,
name="compute",
description="Add two numbers",
)
callback = execute_code_module._make_sandbox_callback(sandbox_tool)
fresh_sandbox.register_tool("compute", callback)
fresh_sandbox.run("None")
snapshot = fresh_sandbox.snapshot()
fresh_sandbox.restore(snapshot)
result = fresh_sandbox.run('total = call_tool("compute", a=20, b=22)\nprint(total)')
assert result.success
assert "42" in result.stdout
@skip_if_hyperlight_integration_tests_disabled
async def test_output_dir_cleared_between_invocations() -> None:
"""Verify stale output files don't leak across invocations (comment 23)."""
_skip_if_hyperlight_integration_runtime_disabled()
provider = HyperlightCodeActProvider(workspace_root=Path(__file__).parent)
context = _FakeSessionContext()
state: dict[str, Any] = {}
await provider.before_run(agent=object(), session=None, context=context, state=state)
run_tool = context.tools[0][1][0]
assert isinstance(run_tool, HyperlightExecuteCodeTool)
# First invocation: write a file
result1 = await run_tool.invoke(
arguments={"code": ('with open("/output/stale.txt", "w") as f:\n f.write("first")\nprint("wrote")\n')}
)
assert result1[0].type == "text" or result1[0].type == "data"
outputs1 = result1
assert any(
item.type == "data" and "stale.txt" in (item.additional_properties or {}).get("path", "") for item in outputs1
), "First invocation should produce stale.txt"
# Second invocation: no file writes
result2 = await run_tool.invoke(arguments={"code": 'print("clean")\n'})
outputs2 = result2
stale_files = [
item
for item in outputs2
if item.type == "data" and "stale.txt" in (item.additional_properties or {}).get("path", "")
]
assert not stale_files, "Stale output file leaked into second invocation"
@skip_if_hyperlight_integration_tests_disabled
async def test_run_code_does_not_block_event_loop() -> None:
"""Verify _run_code uses asyncio.to_thread so the event loop stays responsive (comment 26)."""
_skip_if_hyperlight_integration_runtime_disabled()
provider = HyperlightCodeActProvider()
context = _FakeSessionContext()
state: dict[str, Any] = {}
await provider.before_run(agent=object(), session=None, context=context, state=state)
run_tool = context.tools[0][1][0]
assert isinstance(run_tool, HyperlightExecuteCodeTool)
# Monkeypatch the registry.execute to block on an event, proving the event loop
# stays responsive while the worker thread is blocked.
release = threading.Event()
async_started = asyncio.Event()
loop = asyncio.get_running_loop()
original_execute = run_tool._registry.execute
def _blocking_execute(*, config, code):
loop.call_soon_threadsafe(async_started.set)
release.wait(timeout=10)
return original_execute(config=config, code=code)
run_tool._registry.execute = _blocking_execute # type: ignore[assignment]
concurrent_ran = False
async def _concurrent_task():
nonlocal concurrent_ran
await async_started.wait()
concurrent_ran = True
release.set()
code_task = asyncio.create_task(run_tool.invoke(arguments={"code": 'print("done")\n'}))
await _concurrent_task()
result = await code_task
assert concurrent_ran, "Event loop was blocked during sandbox execution"
assert result[0].type == "text"
@@ -549,6 +549,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
chunk,
options=validated_options,
function_call_ids=function_call_ids,
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
)
else:
async for chunk in await client.responses.create(stream=True, **run_options):
@@ -556,6 +557,7 @@ class RawOpenAIChatClient( # type: ignore[misc]
chunk,
options=validated_options,
function_call_ids=function_call_ids,
seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids,
)
except Exception as ex:
self._handle_request_error(ex)
@@ -1587,6 +1589,54 @@ class RawOpenAIChatClient( # type: ignore[misc]
"""Join shell commands into a single executable command string."""
return "\n".join(command for command in commands if command).strip()
@staticmethod
def _serialize_provider_payload(value: Any) -> Any:
"""Convert OpenAI SDK objects into JSON-serializable Python values."""
if isinstance(value, BaseModel):
return value.model_dump(mode="json", exclude_none=True)
if isinstance(value, Mapping):
return {str(key): RawOpenAIChatClient._serialize_provider_payload(item) for key, item in value.items()} # type: ignore[reportUnknownVariableType]
if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)):
return [RawOpenAIChatClient._serialize_provider_payload(item) for item in value] # type: ignore[reportUnknownVariableType]
return value
@staticmethod
def _get_search_tool_name(item_type: str) -> str:
"""Map OpenAI search output item types to unified content tool names."""
return "web_search" if item_type == "web_search_call" else "file_search"
def _parse_search_tool_call_content(self, item: Any) -> Content:
"""Create unified search tool call content from an OpenAI search output item."""
item_type = getattr(item, "type", "")
call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or ""
if item_type == "web_search_call":
arguments = self._serialize_provider_payload(getattr(item, "action", None))
else:
arguments = {"queries": list(getattr(item, "queries", []) or [])}
return Content.from_search_tool_call(
call_id=call_id,
tool_name=self._get_search_tool_name(item_type),
arguments=arguments,
status=getattr(item, "status", None),
raw_representation=item,
)
def _parse_search_tool_result_content(self, item: Any) -> Content:
"""Create unified search tool result content from an OpenAI search output item."""
item_type = getattr(item, "type", "")
call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or ""
if item_type == "web_search_call":
result = {"action": self._serialize_provider_payload(getattr(item, "action", None))}
else:
result = {"results": self._serialize_provider_payload(getattr(item, "results", None))}
return Content.from_search_tool_result(
call_id=call_id,
tool_name=self._get_search_tool_name(item_type),
result=result,
status=getattr(item, "status", None),
raw_representation=item,
)
# region Parse methods
def _parse_response_from_openai(
self,
@@ -1788,6 +1838,9 @@ class RawOpenAIChatClient( # type: ignore[misc]
raw_representation=item,
)
)
case "web_search_call" | "file_search_call":
contents.append(self._parse_search_tool_call_content(item))
contents.append(self._parse_search_tool_result_content(item))
case "mcp_approval_request": # ResponseOutputMcpApprovalRequest
contents.append(
Content.from_function_approval_request(
@@ -2377,8 +2430,19 @@ class RawOpenAIChatClient( # type: ignore[misc]
additional_properties=additional_properties_empty or None,
)
)
case "web_search_call" | "file_search_call":
contents.append(self._parse_search_tool_call_content(event_item))
case _:
logger.debug("Unparsed event of type: %s: %s", event.type, event)
case (
"response.web_search_call.in_progress"
| "response.web_search_call.searching"
| "response.web_search_call.completed"
| "response.file_search_call.in_progress"
| "response.file_search_call.searching"
| "response.file_search_call.completed"
):
pass
case "response.function_call_arguments.delta":
call_id, name = function_call_ids.get(event.output_index, (None, None))
if call_id and name:
@@ -2514,6 +2578,8 @@ class RawOpenAIChatClient( # type: ignore[misc]
raw_representation=done_item,
)
)
elif getattr(done_item, "type", None) in ("web_search_call", "file_search_call"):
contents.append(self._parse_search_tool_result_content(done_item))
case _:
logger.debug("Unparsed event of type: %s: %s", event.type, event)
@@ -7,7 +7,7 @@ import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Annotated, Any
from unittest.mock import MagicMock, patch
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from agent_framework import (
@@ -71,6 +71,35 @@ class OutputStruct(BaseModel):
weather: str | None = None
class _FakeAsyncEventStream:
def __init__(self, events: list[object]) -> None:
self._events = events
self._iterator = iter(())
def __aiter__(self) -> "_FakeAsyncEventStream":
self._iterator = iter(self._events)
return self
async def __anext__(self) -> object:
try:
return next(self._iterator)
except StopIteration as exc:
raise StopAsyncIteration from exc
class _FakeAsyncEventStreamContext(_FakeAsyncEventStream):
async def __aenter__(self) -> "_FakeAsyncEventStreamContext":
return self
async def __aexit__(
self,
exc_type: type[BaseException] | None,
exc: BaseException | None,
traceback: object | None,
) -> None:
return None
async def create_vector_store(
client: OpenAIChatClient,
) -> tuple[str, Content]:
@@ -1250,6 +1279,91 @@ def test_response_content_creation_with_function_call() -> None:
assert function_call.arguments == '{"location": "Seattle"}'
def test_parse_response_from_openai_with_web_search_call() -> None:
"""Test _parse_response_from_openai with web search output."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
mock_response = MagicMock()
mock_response.output_parsed = None
mock_response.metadata = {}
mock_response.usage = None
mock_response.id = "resp-web"
mock_response.model = "test-model"
mock_response.created_at = 1000000000
mock_search_item = MagicMock()
mock_search_item.type = "web_search_call"
mock_search_item.id = "ws_123"
mock_search_item.status = "completed"
mock_search_item.action = {
"type": "search",
"query": "current weather in Seattle",
"queries": ["current weather in Seattle"],
"sources": [{"title": "Weather", "url": "https://weather.example"}],
}
mock_response.output = [mock_search_item]
response = client._parse_response_from_openai(mock_response, options={}) # type: ignore
assert len(response.messages[0].contents) == 2
call_content, result_content = response.messages[0].contents
assert call_content.type == "search_tool_call"
assert call_content.call_id == "ws_123"
assert call_content.tool_name == "web_search"
assert call_content.status == "completed"
assert call_content.arguments == mock_search_item.action
assert result_content.type == "search_tool_result"
assert result_content.call_id == "ws_123"
assert result_content.tool_name == "web_search"
assert result_content.status == "completed"
assert result_content.result == {"action": mock_search_item.action}
def test_parse_response_from_openai_with_file_search_call() -> None:
"""Test _parse_response_from_openai with file search output."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
mock_response = MagicMock()
mock_response.output_parsed = None
mock_response.metadata = {}
mock_response.usage = None
mock_response.id = "resp-file"
mock_response.model = "test-model"
mock_response.created_at = 1000000000
mock_search_item = MagicMock()
mock_search_item.type = "file_search_call"
mock_search_item.id = "fs_123"
mock_search_item.status = "completed"
mock_search_item.queries = ["weather history"]
mock_search_item.results = [
{
"file_id": "file_1",
"filename": "weather.txt",
"score": 0.9,
"text": "Seattle was cloudy.",
}
]
mock_response.output = [mock_search_item]
response = client._parse_response_from_openai(mock_response, options={}) # type: ignore
assert len(response.messages[0].contents) == 2
call_content, result_content = response.messages[0].contents
assert call_content.type == "search_tool_call"
assert call_content.call_id == "fs_123"
assert call_content.tool_name == "file_search"
assert call_content.status == "completed"
assert call_content.arguments == {"queries": ["weather history"]}
assert result_content.type == "search_tool_result"
assert result_content.call_id == "fs_123"
assert result_content.tool_name == "file_search"
assert result_content.status == "completed"
assert result_content.result == {"results": mock_search_item.results}
def test_prepare_content_for_opentool_approval_response() -> None:
"""Test _prepare_content_for_openai with function approval response content."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
@@ -1394,6 +1508,86 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None:
assert result_content.output is not None
def test_parse_chunk_from_openai_with_web_search_call_added() -> None:
"""Test that response.output_item.added for web_search_call emits search tool call content."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
mock_event = MagicMock()
mock_event.type = "response.output_item.added"
mock_event.output_index = 0
mock_item = MagicMock()
mock_item.type = "web_search_call"
mock_item.id = "ws_call_123"
mock_item.status = "in_progress"
mock_item.action = {"type": "search", "query": "weather in Seattle"}
mock_event.item = mock_item
update = client._parse_chunk_from_openai(mock_event, options=chat_options, function_call_ids=function_call_ids)
assert len(update.contents) == 1
content = update.contents[0]
assert content.type == "search_tool_call"
assert content.call_id == "ws_call_123"
assert content.tool_name == "web_search"
assert content.status == "in_progress"
assert content.arguments == {"type": "search", "query": "weather in Seattle"}
def test_parse_chunk_from_openai_with_file_search_call_done() -> None:
"""Test that response.output_item.done for file_search_call emits search tool result content."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
mock_event = MagicMock()
mock_event.type = "response.output_item.done"
mock_item = MagicMock()
mock_item.type = "file_search_call"
mock_item.id = "fs_call_123"
mock_item.status = "completed"
mock_item.results = [{"file_id": "file_1", "text": "Seattle was cloudy."}]
mock_event.item = mock_item
update = client._parse_chunk_from_openai(mock_event, options=chat_options, function_call_ids=function_call_ids)
assert len(update.contents) == 1
content = update.contents[0]
assert content.type == "search_tool_result"
assert content.call_id == "fs_call_123"
assert content.tool_name == "file_search"
assert content.status == "completed"
assert content.result == {"results": [{"file_id": "file_1", "text": "Seattle was cloudy."}]}
@pytest.mark.parametrize(
"event_type",
[
"response.web_search_call.in_progress",
"response.web_search_call.searching",
"response.web_search_call.completed",
"response.file_search_call.in_progress",
"response.file_search_call.searching",
"response.file_search_call.completed",
],
)
def test_parse_chunk_from_openai_ignores_search_progress_events(event_type: str) -> None:
"""Search progress events should be explicitly ignored instead of logged as unparsed."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
chat_options = ChatOptions()
function_call_ids: dict[int, tuple[str, str]] = {}
mock_event = MagicMock()
mock_event.type = event_type
update = client._parse_chunk_from_openai(mock_event, options=chat_options, function_call_ids=function_call_ids)
assert update.contents == []
def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None:
"""Test that response.output_item.added for mcp_call emits only the call, not the result.
@@ -2716,6 +2910,48 @@ async def test_get_response_streaming_with_response_format() -> None:
await run_streaming()
async def test_inner_get_response_streaming_with_response_format_tracks_reasoning_delta_ids() -> None:
"""The responses.stream path should suppress reasoning done events after deltas."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
messages = [Message(role="user", contents=["Test streaming with format"])]
item_id = "reasoning_stream"
events = [
ResponseReasoningTextDeltaEvent(
type="response.reasoning_text.delta",
content_index=0,
item_id=item_id,
output_index=0,
sequence_number=1,
delta="Hello ",
),
ResponseReasoningTextDoneEvent(
type="response.reasoning_text.done",
content_index=0,
item_id=item_id,
output_index=0,
sequence_number=2,
text="Hello ",
),
]
with (
patch.object(
client,
"_prepare_request",
new=AsyncMock(return_value=(client.client, {"text_format": OutputStruct}, {})),
),
patch.object(client.client.responses, "stream", return_value=_FakeAsyncEventStreamContext(events)),
patch.object(client, "_get_metadata_from_response", return_value={}),
):
stream = client._inner_get_response(messages=messages, options={}, stream=True)
updates = [update async for update in stream]
reasoning_chunks = [
content.text for update in updates for content in update.contents if content.type == "text_reasoning"
]
assert reasoning_chunks == ["Hello "]
def test_prepare_content_for_openai_image_content() -> None:
"""Test _prepare_content_for_openai with image content variations."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
@@ -3153,6 +3389,44 @@ def test_streaming_reasoning_deltas_then_done_no_duplication() -> None:
assert "".join(c.text for c in all_contents) == "Hello world"
async def test_inner_get_response_streaming_create_tracks_reasoning_delta_ids() -> None:
"""The responses.create(stream=True) path should suppress reasoning done events after deltas."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
messages = [Message(role="user", contents=["Test streaming"])]
item_id = "reasoning_create"
events = [
ResponseReasoningTextDeltaEvent(
type="response.reasoning_text.delta",
content_index=0,
item_id=item_id,
output_index=0,
sequence_number=1,
delta="Hello ",
),
ResponseReasoningTextDoneEvent(
type="response.reasoning_text.done",
content_index=0,
item_id=item_id,
output_index=0,
sequence_number=2,
text="Hello ",
),
]
with (
patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))),
patch.object(client.client.responses, "create", new=AsyncMock(return_value=_FakeAsyncEventStream(events))),
patch.object(client, "_get_metadata_from_response", return_value={}),
):
stream = client._inner_get_response(messages=messages, options={}, stream=True)
updates = [update async for update in stream]
reasoning_chunks = [
content.text for update in updates for content in update.contents if content.type == "text_reasoning"
]
assert reasoning_chunks == ["Hello "]
def test_streaming_reasoning_events_preserve_metadata() -> None:
"""Test that reasoning events preserve metadata like regular text events."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
@@ -3890,26 +4164,22 @@ async def test_integration_tool_rich_content_image() -> None:
client = OpenAIChatClient()
client.function_invocation_configuration["max_iterations"] = 2
for streaming in [False, True]:
messages = [
Message(
role="user",
contents=["Call the get_test_image tool and describe what you see."],
)
]
options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"}
messages = [
Message(
role="user",
contents=["Call the get_test_image tool and describe what you see."],
)
]
options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"}
if streaming:
response = await client.get_response(messages=messages, stream=True, options=options).get_final_response()
else:
response = await client.get_response(messages=messages, options=options)
response = await client.get_response(messages=messages, stream=True, options=options).get_final_response()
assert response is not None
assert isinstance(response, ChatResponse)
assert response.text is not None
assert len(response.text) > 0
# sample_image.jpg contains a photo of a house; the model should mention it.
assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}"
assert response is not None
assert isinstance(response, ChatResponse)
assert response.text is not None
assert len(response.text) > 0
# sample_image.jpg contains a photo of a house; the model should mention it.
assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}"
@pytest.mark.flaky
@@ -486,6 +486,7 @@ async def test_integration_client_agent_existing_session() -> None:
@pytest.mark.integration
@skip_if_azure_openai_integration_tests_disabled
@_with_azure_openai_debug()
@pytest.mark.skip(reason="Azure OpenAI is flaky when handling image content as function result. Needs investigation.")
async def test_azure_openai_chat_client_tool_rich_content_image() -> None:
image_path = Path(__file__).parent.parent / "assets" / "sample_image.jpg"
image_bytes = image_path.read_bytes()
@@ -499,21 +500,12 @@ async def test_azure_openai_chat_client_tool_rich_content_image() -> None:
client = OpenAIChatClient(credential=credential)
client.function_invocation_configuration["max_iterations"] = 2
for streaming in [False, True]:
messages = [Message(role="user", contents=["Call the get_test_image tool and describe what you see."])]
options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"}
response = await client.get_response(
messages=[Message(role="user", contents=["Call the get_test_image tool and describe what you see."])],
stream=True,
options={"tools": [get_test_image], "tool_choice": "auto"},
).get_final_response()
if streaming:
response = await client.get_response(
messages=messages,
stream=True,
options=options,
).get_final_response()
else:
response = await client.get_response(messages=messages, options=options)
assert isinstance(response, ChatResponse)
assert response.text is not None
assert "house" in response.text.lower(), (
f"Model did not describe the house image. Response: {response.text}"
)
assert isinstance(response, ChatResponse)
assert response.text is not None
assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}"
+1
View File
@@ -83,6 +83,7 @@ agent-framework-foundry-hosting = { workspace = true }
agent-framework-foundry-local = { workspace = true }
agent-framework-gemini = { workspace = true }
agent-framework-github-copilot = { workspace = true }
agent-framework-hyperlight = { workspace = true }
agent-framework-lab = { workspace = true }
agent-framework-mem0 = { workspace = true }
agent-framework-ollama = { workspace = true }
@@ -7,6 +7,7 @@ These samples demonstrate how to use context providers to enrich agent conversat
| File / Folder | Description |
|---------------|-------------|
| [`simple_context_provider.py`](simple_context_provider.py) | Implement a custom context provider by extending `ContextProvider` to extract and inject structured user information across turns. |
| [`foundry_toolbox_context_provider.py`](foundry_toolbox_context_provider.py) | Compose a Microsoft Foundry toolbox with a `ContextProvider` that caches the toolbox once and picks a subset of its tools per-turn via `select_toolbox_tools`, driven by keywords in the latest user message. |
| [`azure_ai_foundry_memory.py`](azure_ai_foundry_memory.py) | Use `FoundryMemoryProvider` to add semantic memory — automatically retrieves, searches, and stores memories via Azure AI Foundry. |
| [`azure_ai_search/`](azure_ai_search/) | Retrieval Augmented Generation (RAG) with Azure AI Search in semantic and agentic modes. See its own [README](azure_ai_search/README.md). |
| [`mem0/`](mem0/) | Memory-powered context using the Mem0 integration (open-source and managed). See its own [README](mem0/README.md). |
@@ -19,6 +20,12 @@ These samples demonstrate how to use context providers to enrich agent conversat
- `FOUNDRY_MODEL`: Model deployment name
- Azure CLI authentication (`az login`)
**For `foundry_toolbox_context_provider.py`:**
- `FOUNDRY_PROJECT_ENDPOINT`: Your Microsoft Foundry project endpoint
- `FOUNDRY_MODEL`: Model deployment name
- A toolbox already configured in that project; set `TOOLBOX_NAME` / `TOOLBOX_VERSION` at the top of the sample
- Azure CLI authentication (`az login`)
**For `azure_ai_foundry_memory.py`:**
- `FOUNDRY_PROJECT_ENDPOINT`: Your Azure AI Foundry project endpoint
- `FOUNDRY_MODEL`: Chat/responses model deployment name
@@ -8,7 +8,7 @@ This folder contains examples demonstrating how to use the Azure AI Search conte
| File | Description |
|------|-------------|
| [`search_context_agentic.py`](search_context_agentic.py) | **Agentic mode** (recommended for most scenarios): Uses Knowledge Bases in Azure AI Search for query planning and multi-hop reasoning. Provides more accurate results through intelligent retrieval with automatic query reformulation. Slightly slower with more token consumption for query planning. [Learn more](https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/foundry-iq-boost-response-relevance-by-36-with-agentic-retrieval/4470720) |
| [`search_context_agentic.py`](search_context_agentic.py) | **Agentic mode** (recommended for most scenarios): Uses Knowledge Bases in Azure AI Search for query planning and multi-hop reasoning. Provides more accurate results through intelligent retrieval with automatic query reformulation. Slightly slower with more token consumption for query planning. [Learn more](https://learn.microsoft.com/azure/search/agentic-retrieval-overview) |
| [`search_context_semantic.py`](search_context_semantic.py) | **Semantic mode** (fast queries): Fast hybrid search combining vector and keyword search with semantic ranking. Returns raw search results as context. Best for scenarios where speed is critical and simple retrieval is sufficient. |
## Installation
@@ -265,4 +265,4 @@ async with Agent(
- [RAG with Azure AI Search](https://learn.microsoft.com/azure/search/retrieval-augmented-generation-overview)
- [Semantic Search in Azure AI Search](https://learn.microsoft.com/azure/search/semantic-search-overview)
- [Knowledge Bases in Azure AI Search](https://learn.microsoft.com/azure/search/knowledge-store-concept-intro)
- [Agentic Retrieval Blog Post](https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/foundry-iq-boost-response-relevance-by-36-with-agentic-retrieval/4470720)
- [Agentic Retrieval in Azure AI Search](https://learn.microsoft.com/azure/search/agentic-retrieval-overview)
@@ -0,0 +1,207 @@
# Copyright (c) Microsoft. All rights reserved.
import asyncio
import os
from typing import Any
from agent_framework import Agent, AgentSession, ContextProvider, Message, SessionContext
from agent_framework.foundry import (
FoundryChatClient,
get_toolbox_tool_name,
get_toolbox_tool_type,
select_toolbox_tools,
)
from azure.identity import AzureCliCredential
from dotenv import load_dotenv
from pydantic import BaseModel
# Load environment variables from .env file
load_dotenv()
"""
Foundry Toolbox + Context Provider Example
This sample composes a Foundry toolbox with a ContextProvider so the agent's
tool list is chosen dynamically per-turn. It uses the chat client itself as a lightweight "tool router": the
latest user message plus a short menu of toolbox tools is sent to the model
with a Pydantic ``response_format``, and the returned tool names drive
``select_toolbox_tools``. The toolbox is fetched once and cached on the
provider's state dict; subsequent turns reuse the cache.
Prerequisites:
- A Microsoft Foundry project
- A toolbox already configured in that project (set TOOLBOX_NAME below)
- FOUNDRY_PROJECT_ENDPOINT and FOUNDRY_MODEL environment variables set
- Azure CLI authentication (`az login`)
"""
# Replace with your own Foundry toolbox name and version.
TOOLBOX_NAME = "research_toolbox"
# Set to None to resolve the toolbox's current default version at fetch time.
TOOLBOX_VERSION: str | None = None
# Generic queries that exercise the router without assuming any specific tool
# types are configured. The first is introspective, the second forces a
# non-empty pick for whichever tools the toolbox actually contains, and the
# third should route to nothing.
QUERIES: list[str] = [
"Introduce yourself and briefly describe the tools you can use to help me.",
"Pick the tool you think is most useful and demonstrate it with a short example.",
"Say hi in one short sentence - no tools needed.",
]
def create_sample_toolbox(name: str) -> str:
"""Create (or replace) a toolbox version in the Foundry project.
Toolboxes are normally configured in the Foundry portal or a deployment
script, not the application itself. This helper exists so the sample can
be run end-to-end without first setting a toolbox up by hand delete any
existing toolbox under ``name``, then create a fresh version containing a
single MCP tool. Returns the created version identifier.
"""
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import MCPTool, Tool
from azure.core.exceptions import ResourceNotFoundError
with (
AzureCliCredential() as credential,
AIProjectClient(credential=credential, endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"]) as project_client,
):
try:
project_client.beta.toolboxes.delete(name)
print(f"Toolbox `{name}` deleted")
except ResourceNotFoundError:
pass
tools: list[Tool] = [
MCPTool(
server_label="api_specs",
server_url="https://gitmcp.io/Azure/azure-rest-api-specs",
require_approval="never",
)
]
created = project_client.beta.toolboxes.create_version(
name=name,
description="Toolbox version with MCP require_approval set to 'never'.",
tools=tools,
)
print(f"Created toolbox {created.name}@{created.version} ({len(created.tools)} tool(s))")
return created.version
class ToolSelection(BaseModel):
"""Structured output for the per-turn tool router."""
tool_names: list[str]
ROUTER_INSTRUCTIONS = (
"You are a tool router. Given the user's latest message and a menu of "
"available tools (one per line, formatted as 'NAME - TYPE'), return the "
"NAMES of the tools that would plausibly help answer the message. Return "
"an empty list if no tool is needed."
)
class DynamicToolboxProvider(ContextProvider):
"""Fetches a Foundry toolbox once and lets the model pick tools per-turn."""
DEFAULT_SOURCE_ID = "foundry_toolbox"
def __init__(
self,
source_id: str = DEFAULT_SOURCE_ID,
*,
client: FoundryChatClient,
toolbox_name: str,
toolbox_version: str | None = None,
) -> None:
super().__init__(source_id)
self._client = client
self._toolbox_name = toolbox_name
self._toolbox_version = toolbox_version
async def before_run(
self,
*,
agent: Any,
session: AgentSession | None,
context: SessionContext,
state: dict[str, Any],
) -> None:
"""Cache the toolbox on first call, then let the model pick tools per-turn."""
toolbox = state.get("toolbox")
if toolbox is None:
toolbox = await self._client.get_toolbox(self._toolbox_name, version=self._toolbox_version)
state["toolbox"] = toolbox
print(f"[{self.source_id}] Loaded toolbox {toolbox.name}@{toolbox.version} ({len(toolbox.tools)} tool(s))")
user_messages = [m for m in context.get_messages(include_input=True) if getattr(m, "role", None) == "user"]
if not user_messages:
context.extend_tools(self.source_id, list(toolbox.tools))
return
picks = await self._route_tools(user_messages[-1].text, toolbox.tools)
if picks:
tools = select_toolbox_tools(toolbox, include_names=picks)
print(f"[{self.source_id}] Router picked {sorted(picks)} - surfacing {len(tools)} tool(s)")
else:
tools = list(toolbox.tools)
print(f"[{self.source_id}] Router picked nothing - surfacing all {len(tools)} tool(s)")
context.extend_tools(self.source_id, tools)
async def _route_tools(self, user_text: str, tools: Any) -> list[str]:
"""Ask the model which toolbox tools to surface for this turn."""
menu = "\n".join(f"- {get_toolbox_tool_name(t)} - {get_toolbox_tool_type(t)}" for t in tools)
prompt = (
f"User message:\n{user_text}\n\n"
f"Available tools:\n{menu}\n\n"
"Return the names of tools that should be surfaced for this turn."
)
response = await self._client.get_response(
messages=[Message("user", [prompt])],
options={
"instructions": ROUTER_INSTRUCTIONS,
"response_format": ToolSelection,
},
)
selection: ToolSelection = response.value # type: ignore
return selection.tool_names
async def main() -> None:
client = FoundryChatClient(
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
credential=AzureCliCredential(),
)
# Comment out if the toolbox already exists in your Foundry project.
create_sample_toolbox(TOOLBOX_NAME)
toolbox_provider = DynamicToolboxProvider(
client=client,
toolbox_name=TOOLBOX_NAME,
toolbox_version=TOOLBOX_VERSION,
)
async with Agent(
client=client,
instructions=(
"You are a helpful assistant. Use the tools available to you on each "
"turn to answer the user. If no tools are relevant, reply directly."
),
context_providers=[toolbox_provider],
) as agent:
session = agent.create_session()
for query in QUERIES:
print(f"\nUser: {query}")
result = await agent.run(query, session=session)
print(f"Assistant: {result}")
if __name__ == "__main__":
asyncio.run(main())
@@ -26,6 +26,8 @@ This folder contains Azure AI Foundry and Foundry Local samples for Agent Framew
| [`foundry_chat_client_with_hosted_mcp.py`](foundry_chat_client_with_hosted_mcp.py) | Foundry Chat Client with hosted MCP |
| [`foundry_chat_client_with_local_mcp.py`](foundry_chat_client_with_local_mcp.py) | Foundry Chat Client with local MCP |
| [`foundry_chat_client_with_session.py`](foundry_chat_client_with_session.py) | Foundry Chat Client with session management |
| [`foundry_chat_client_with_toolbox.py`](foundry_chat_client_with_toolbox.py) | Foundry Chat Client with Foundry toolbox loading and multi-toolbox composition |
| [`foundry_chat_client_with_toolbox_mcp.py`](foundry_chat_client_with_toolbox_mcp.py) | Foundry Chat Client connected to a toolbox via its MCP endpoint using `MCPStreamableHTTPTool` |
## FoundryLocalClient Samples
@@ -0,0 +1,174 @@
# Copyright (c) Microsoft. All rights reserved.
import asyncio
import os
from agent_framework import Agent
from agent_framework.foundry import FoundryChatClient, select_toolbox_tools
from azure.identity import AzureCliCredential
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
"""
Foundry Chat Client with Toolbox Example
This sample demonstrates loading a named, versioned Foundry toolbox into an
Agent via ``FoundryChatClient.get_toolbox()``. A toolbox is a server-side
bundle of tool configurations (code interpreter, file search, MCP, web search,
etc.) configured in the Foundry portal or via the raw SDK.
Prerequisites:
- A Microsoft Foundry project
- A toolbox already configured in that project (set TOOLBOX_NAME below)
- FOUNDRY_PROJECT_ENDPOINT and FOUNDRY_MODEL environment variables set
"""
# Replace with your own Foundry toolbox name and version.
TOOLBOX_NAME = "research_toolbox"
TOOLBOX_VERSION = "1"
# Used only by combine_toolboxes() — swap in a second toolbox you own.
SECOND_TOOLBOX_NAME = "analysis_toolbox"
SECOND_TOOLBOX_VERSION = "1"
# Replace with any question that exercises the tools configured in your toolbox.
QUERY = "Introduce yourself and briefly describe the tools you can use to help me."
def create_sample_toolbox(name: str) -> str:
"""Create (or replace) a toolbox version in the Foundry project.
Toolboxes are normally configured in the Foundry portal or a deployment
script, not the application itself. This helper exists so the samples can
be run end-to-end without first setting a toolbox up by hand delete any
existing toolbox under ``name``, then create a fresh version containing a
single MCP tool. Returns the created version identifier.
"""
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import MCPTool, Tool
from azure.core.exceptions import ResourceNotFoundError
with (
AzureCliCredential() as credential,
AIProjectClient(credential=credential, endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"]) as project_client,
):
try:
project_client.beta.toolboxes.delete(name)
print(f"Toolbox `{name}` deleted")
except ResourceNotFoundError:
pass
tools: list[Tool] = [
MCPTool(
server_label="api_specs",
server_url="https://gitmcp.io/Azure/azure-rest-api-specs",
require_approval="never",
)
]
created = project_client.beta.toolboxes.create_version(
name=name,
description="Toolbox version with MCP require_approval set to 'never'.",
tools=tools,
)
print(f"Created toolbox {created.name}@{created.version} ({len(created.tools)} tool(s))")
return created.version
async def main() -> None:
"""Example showing how to use a single Foundry toolbox with FoundryChatClient."""
print("=== Foundry Chat Client with Toolbox Example ===")
# For authentication, run `az login` in your terminal or replace
# AzureCliCredential with your preferred authentication option.
client = FoundryChatClient(
credential=AzureCliCredential(),
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
)
# Comment out if the toolbox already exists in your Foundry project.
create_sample_toolbox(TOOLBOX_NAME)
# Omit ``version`` to resolve the toolbox's current default version at runtime.
toolbox = await client.get_toolbox(TOOLBOX_NAME)
print(f"Loaded toolbox {toolbox.name}@{toolbox.version} ({len(toolbox.tools)} tool(s))")
agent = Agent(
client=client,
instructions="You are a research assistant. Use the available tools to answer questions.",
tools=toolbox,
)
print(f"User: {QUERY}")
result = await agent.run(QUERY)
print(f"Result: {result}\n")
async def combine_toolboxes() -> None:
"""Alternative flow: combine the tools from multiple Foundry toolboxes."""
client = FoundryChatClient(
credential=AzureCliCredential(),
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
)
# Comment out if the toolboxes already exist in your Foundry project.
create_sample_toolbox(TOOLBOX_NAME)
create_sample_toolbox(SECOND_TOOLBOX_NAME)
toolbox_a = await client.get_toolbox(TOOLBOX_NAME, version=TOOLBOX_VERSION)
toolbox_b = await client.get_toolbox(SECOND_TOOLBOX_NAME, version=SECOND_TOOLBOX_VERSION)
print(
"Loaded toolboxes: "
f"{toolbox_a.name}@{toolbox_a.version} ({len(toolbox_a.tools)} tool(s)), "
f"{toolbox_b.name}@{toolbox_b.version} ({len(toolbox_b.tools)} tool(s))"
)
agent = Agent(
client=client,
instructions="You are a research assistant. Use all available tools to answer questions.",
tools=[toolbox_a, toolbox_b],
)
print(f"User: {QUERY}")
result = await agent.run(QUERY)
print(f"Combined-toolbox result: {result}\n")
async def select_tools_from_toolbox() -> None:
"""Alternative flow: keep only a subset of toolbox tools before agent creation."""
client = FoundryChatClient(
credential=AzureCliCredential(),
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
)
# Comment out if the toolbox already exists in your Foundry project.
create_sample_toolbox(TOOLBOX_NAME)
toolbox = await client.get_toolbox(TOOLBOX_NAME, version=TOOLBOX_VERSION)
print(f"Loaded toolbox {toolbox.name}@{toolbox.version} ({len(toolbox.tools)} tool(s))")
selected_tools = select_toolbox_tools(
toolbox,
include_types=["code_interpreter", "mcp"],
)
print(f"Selected {len(selected_tools)} toolbox tools for the agent")
agent = Agent(
client=client,
instructions="You are a research assistant. Use only the selected toolbox tools.",
tools=selected_tools,
)
print(f"User: {QUERY}")
result = await agent.run(QUERY)
print(f"Selected-toolbox result: {result}\n")
if __name__ == "__main__":
asyncio.run(main())
# asyncio.run(combine_toolboxes())
# asyncio.run(select_tools_from_toolbox())
@@ -0,0 +1,118 @@
# Copyright (c) Microsoft. All rights reserved.
import asyncio
import os
from collections.abc import Callable
from typing import Any
from agent_framework import Agent, MCPStreamableHTTPTool
from agent_framework.foundry import FoundryChatClient
from azure.core.credentials import TokenCredential
from azure.identity import AzureCliCredential, DefaultAzureCredential, get_bearer_token_provider
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
"""
Foundry Toolbox via MAF ``MCPStreamableHTTPTool``
Instead of fetching the toolbox and fanning out individual tool specs, point
MAF's ``MCPStreamableHTTPTool`` at the toolbox's MCP endpoint. The agent
discovers and calls the toolbox's tools over MCP at runtime.
Prerequisites:
- A Microsoft Foundry project with a toolbox configured
- FOUNDRY_PROJECT_ENDPOINT and FOUNDRY_MODEL environment variables set
- FOUNDRY_TOOLBOX_ENDPOINT: the toolbox's MCP endpoint URL, e.g.
``https://<account>.services.ai.azure.com/api/projects/<project>/toolsets/<name>/mcp?api-version=v1``
- Azure CLI authentication (``az login``)
"""
# Must match the ``<name>`` segment of FOUNDRY_TOOLBOX_ENDPOINT.
TOOLBOX_NAME = "research_toolbox"
def create_sample_toolbox(name: str) -> str:
"""Create (or replace) a toolbox version in the Foundry project.
Toolboxes are normally configured in the Foundry portal or a deployment
script, not the application itself. This helper exists so the sample can
be run end-to-end without first setting a toolbox up by hand delete any
existing toolbox under ``name``, then create a fresh version containing a
single MCP tool. Returns the created version identifier.
"""
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import MCPTool, Tool
from azure.core.exceptions import ResourceNotFoundError
with (
AzureCliCredential() as credential,
AIProjectClient(credential=credential, endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"]) as project_client,
):
try:
project_client.beta.toolboxes.delete(name)
print(f"Toolbox `{name}` deleted")
except ResourceNotFoundError:
pass
tools: list[Tool] = [
MCPTool(
server_label="api_specs",
server_url="https://gitmcp.io/Azure/azure-rest-api-specs",
require_approval="never",
)
]
created = project_client.beta.toolboxes.create_version(
name=name,
description="Toolbox version with MCP require_approval set to 'never'.",
tools=tools,
)
print(f"Created toolbox {created.name}@{created.version} ({len(created.tools)} tool(s))")
return created.version
def make_toolbox_header_provider(credential: TokenCredential) -> Callable[[dict[str, Any]], dict[str, str]]:
"""Build a header_provider that injects a fresh Azure AI bearer token on every MCP request."""
get_token = get_bearer_token_provider(credential, "https://ai.azure.com/.default")
def provide(_kwargs: dict[str, Any]) -> dict[str, str]:
return {
"Authorization": f"Bearer {get_token()}",
}
return provide
async def main() -> None:
credential = DefaultAzureCredential()
# Comment out if the toolbox already exists in your Foundry project.
create_sample_toolbox(TOOLBOX_NAME)
toolbox_tool = MCPStreamableHTTPTool(
name="foundry_toolbox",
description="Tools exposed by the configured Foundry toolbox",
url=os.environ["FOUNDRY_TOOLBOX_ENDPOINT"],
header_provider=make_toolbox_header_provider(credential),
load_prompts=False,
)
async with Agent(
client=FoundryChatClient(
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
credential=credential,
),
instructions="You are a helpful assistant. Use the available toolbox tools to answer the user.",
tools=toolbox_tool,
) as agent:
query = "What tools do you have access to?"
print(f"User: {query}")
result = await agent.run(query)
print(f"Assistant: {result}")
if __name__ == "__main__":
asyncio.run(main())
+57 -10
View File
@@ -46,6 +46,7 @@ members = [
"agent-framework-foundry-local",
"agent-framework-gemini",
"agent-framework-github-copilot",
"agent-framework-hyperlight",
"agent-framework-lab",
"agent-framework-mem0",
"agent-framework-ollama",
@@ -496,7 +497,7 @@ requires-dist = [
{ name = "agent-framework-core", editable = "packages/core" },
{ name = "agent-framework-openai", editable = "packages/openai" },
{ name = "azure-ai-inference", specifier = ">=1.0.0b9,<1.0.0b10" },
{ name = "azure-ai-projects", specifier = ">=2.0.0,<3.0" },
{ name = "azure-ai-projects", specifier = ">=2.1.0,<3.0" },
]
[[package]]
@@ -565,6 +566,25 @@ requires-dist = [
{ name = "github-copilot-sdk", marker = "python_full_version >= '3.11'", specifier = ">=0.2.1,<=0.2.1" },
]
[[package]]
name = "agent-framework-hyperlight"
version = "1.0.0a260409"
source = { editable = "packages/hyperlight" }
dependencies = [
{ name = "agent-framework-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "hyperlight-sandbox", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "hyperlight-sandbox-backend-wasm", marker = "(python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'win32')" },
{ name = "hyperlight-sandbox-python-guest", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
]
[package.metadata]
requires-dist = [
{ name = "agent-framework-core", editable = "packages/core" },
{ name = "hyperlight-sandbox", specifier = ">=0.3.0,<0.4" },
{ name = "hyperlight-sandbox-backend-wasm", marker = "(python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'win32')", specifier = ">=0.3.0,<0.4" },
{ name = "hyperlight-sandbox-python-guest", specifier = ">=0.3.0,<0.4" },
]
[[package]]
name = "agent-framework-lab"
version = "1.0.0b260409"
@@ -1092,7 +1112,7 @@ wheels = [
[[package]]
name = "azure-ai-projects"
version = "2.0.1"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
@@ -1102,9 +1122,9 @@ dependencies = [
{ name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/86/f9/a15c8a16e35e6d620faebabc6cc4f9e2f4b7f1d962cc6f58931c46947e24/azure_ai_projects-2.0.1.tar.gz", hash = "sha256:c8c64870aa6b89903af69a4ff28b4eff3df9744f14615ea572cae87394946a0c", size = 491774, upload-time = "2026-03-12T19:59:02.712Z" }
sdist = { url = "https://files.pythonhosted.org/packages/72/76/3fdede8eddfe5927a571898a15f0288ba30fee78e5ba099f88df3ded70af/azure_ai_projects-2.1.0.tar.gz", hash = "sha256:f0749fa9a174255aa1a5550fb6078208521518472907a4c6dd552767d9b39caa", size = 543343, upload-time = "2026-04-20T17:06:48.751Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8d/f7/290ca39501c06c6e23b46ba9f7f3dfb05ecc928cde105fed85d6845060dd/azure_ai_projects-2.0.1-py3-none-any.whl", hash = "sha256:dfda540d256e67a52bf81c75418b6bf92b811b96693fe45787e154a888ad2396", size = 236560, upload-time = "2026-03-12T19:59:04.249Z" },
{ url = "https://files.pythonhosted.org/packages/f7/f6/4984e7772a97c7a9e6505a3de8e55a5070fa2b02cd7e980da91e0d9b9b97/azure_ai_projects-2.1.0-py3-none-any.whl", hash = "sha256:6f259d8eb9167d2dfd372006d0221a8118faeaeb05829fa898b595bc6f19c699", size = 274309, upload-time = "2026-04-20T17:06:50.542Z" },
]
[[package]]
@@ -2487,7 +2507,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" },
{ url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" },
{ url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" },
{ url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" },
{ url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" },
{ url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" },
{ url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" },
@@ -2495,7 +2514,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" },
{ url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" },
{ url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" },
{ url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" },
{ url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" },
{ url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" },
{ url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" },
@@ -2504,7 +2522,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" },
{ url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" },
{ url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" },
{ url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" },
{ url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" },
{ url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" },
{ url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" },
@@ -2513,7 +2530,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" },
{ url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" },
{ url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" },
{ url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" },
{ url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" },
{ url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" },
{ url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" },
@@ -2522,7 +2538,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" },
{ url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" },
{ url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" },
{ url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" },
{ url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" },
{ url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" },
{ url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" },
@@ -2531,7 +2546,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" },
{ url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" },
{ url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" },
{ url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" },
{ url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" },
{ url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" },
{ url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" },
@@ -2825,6 +2839,39 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" },
]
[[package]]
name = "hyperlight-sandbox"
version = "0.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/cf/fe/ce88996ea3e3e05130d6f0e8cd2ffbe9ab9bf3d9448b7050d4b8d0802b0a/hyperlight_sandbox-0.3.0.tar.gz", hash = "sha256:00491ce267ffbdb206377c79b4afd86510177ad73f4daf2ef7fce02b54eaf801", size = 9251, upload-time = "2026-04-07T03:49:52.542Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2b/33/e6dcd6729308d13570ae2d3be0e476019a6f3fea387d7549bb1f77ce0408/hyperlight_sandbox-0.3.0-py3-none-any.whl", hash = "sha256:ba8e6779d64e9c187acd93456851ebafaed2f49380e5d132bc0906a4080d2217", size = 5723, upload-time = "2026-04-07T03:49:53.276Z" },
]
[[package]]
name = "hyperlight-sandbox-backend-wasm"
version = "0.3.0"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/58/91/c9d68cad7996fdd2f1facef1453156bdd8d52eefa976cc8c827c13029497/hyperlight_sandbox_backend_wasm-0.3.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:eda362f5f737b0823326290d7627c76ce0547a78e70f07f8c9d177e34622fc02", size = 3806454, upload-time = "2026-04-07T03:49:24.238Z" },
{ url = "https://files.pythonhosted.org/packages/9a/6f/6b2399a1caf59dd19b635d99ee1add0c975af7bc3317f5d0f1f9c3f90aa0/hyperlight_sandbox_backend_wasm-0.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:79347b7ae94f2786691b04cb52130dabc5991e0c03b42a24bad8adc766832655", size = 3283951, upload-time = "2026-04-07T03:49:17.137Z" },
{ url = "https://files.pythonhosted.org/packages/23/f2/b380c34a0ce8d486a05adb66757f98cca029e1fb1c96b1c29be0d25d3882/hyperlight_sandbox_backend_wasm-0.3.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:aff9eec4803fb535a140298e2632529f4150fcf3c6ea3ff2ae4571572a836116", size = 3806601, upload-time = "2026-04-07T03:49:22.853Z" },
{ url = "https://files.pythonhosted.org/packages/b4/5a/fb78cfd934e0523887b8d5b073b7b2aed3b545add21cda3aa95929ac1659/hyperlight_sandbox_backend_wasm-0.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:b6151704dd19862c9869b115752b4504b45d0b2eeb46aa9385a1a3b8be11cfa8", size = 3284164, upload-time = "2026-04-07T03:49:18.556Z" },
{ url = "https://files.pythonhosted.org/packages/21/bc/4e21f5c7ccd9307ac63a61c71b62a57ee4a9e6eec77fc72ff072907a21f5/hyperlight_sandbox_backend_wasm-0.3.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:cfd1d22ce221774d82a5174d268d56ff70fc1a23fb993a6491358b5d0ed169bf", size = 3802901, upload-time = "2026-04-07T03:49:19.845Z" },
{ url = "https://files.pythonhosted.org/packages/9a/41/646be9b0c7bb0f9192e45a77414673aa414eb316c92b5312efe6fb4ce802/hyperlight_sandbox_backend_wasm-0.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:229ab494a422f2de895a2a27ad6a6a2daed710ea062d7c213878bbe5f5b32fa7", size = 3281220, upload-time = "2026-04-07T03:49:21.368Z" },
{ url = "https://files.pythonhosted.org/packages/74/3a/f8ec4a41fffba4036dfc3cbddc3dfb6e87466b01afe1cb0a50cc6a0f0eed/hyperlight_sandbox_backend_wasm-0.3.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b91905ee2ddd36a78b0dd13b1a62be99a995a45121587c111692591e40b36912", size = 3802789, upload-time = "2026-04-07T03:49:15.614Z" },
{ url = "https://files.pythonhosted.org/packages/3c/62/dfa8c15102f9b8ec5c3b5ffb54b99d60c75e7a6e4d00540757656bc5a5d8/hyperlight_sandbox_backend_wasm-0.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:eff682761c3b86abfe7e0d523ea0e6d5c7e8299302917c53918743b82c9d1ea2", size = 3280501, upload-time = "2026-04-07T03:49:13.939Z" },
]
[[package]]
name = "hyperlight-sandbox-python-guest"
version = "0.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/23/6a/f182c4315d31a98dd3b82f9274638e3adb399779584af93c5087bb2f814f/hyperlight_sandbox_python_guest-0.3.0.tar.gz", hash = "sha256:b1de5d8e87375dc6bef744ecd7ae2a7f43d5f6b913b4e990e9872bd439c0b19e", size = 21554625, upload-time = "2026-04-07T03:49:42.672Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c9/8e/4cd754928464f56528645c7421ccbb3fcbe45ad2542f899712b0f2f2c0e1/hyperlight_sandbox_python_guest-0.3.0-py3-none-any.whl", hash = "sha256:3c55a7420666ad9a208893dbdf7ad1b5c8ad4f3a94b1a56e64979719c7ce95c1", size = 21716481, upload-time = "2026-04-07T03:49:39.885Z" },
]
[[package]]
name = "idna"
version = "3.11"