diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml index 3c6c620614..f2fb5c6448 100644 --- a/.github/workflows/python-integration-tests.yml +++ b/.github/workflows/python-integration-tests.yml @@ -131,7 +131,7 @@ jobs: --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 - # Misc integration tests (Anthropic, Ollama, MCP) + # Misc integration tests (Anthropic, Hyperlight, Ollama, MCP) python-tests-misc-integration: name: Python Integration Tests - Misc runs-on: ubuntu-latest @@ -162,10 +162,11 @@ jobs: fallback_url: ${{ env.LOCAL_MCP_URL }} - name: Prefer local MCP URL when available run: echo "LOCAL_MCP_URL=${{ steps.local-mcp.outputs.effective_url }}" >> "$GITHUB_ENV" - - name: Test with pytest (Anthropic, Ollama, MCP integration) + - name: Test with pytest (Anthropic, Hyperlight, Ollama, MCP integration) run: > uv run pytest --import-mode=importlib packages/anthropic/tests + packages/hyperlight/tests packages/ollama/tests packages/core/tests/core/test_mcp.py -m integration diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml index 454b297bed..dd48b268df 100644 --- a/.github/workflows/python-merge-tests.yml +++ b/.github/workflows/python-merge-tests.yml @@ -65,6 +65,7 @@ jobs: - 'python/samples/**/providers/azure/**' misc: - 'python/packages/anthropic/**' + - 'python/packages/hyperlight/**' - 'python/packages/ollama/**' - 'python/packages/core/agent_framework/_mcp.py' - 'python/packages/core/tests/core/test_mcp.py' @@ -278,10 +279,11 @@ jobs: fallback_url: ${{ env.LOCAL_MCP_URL }} - name: Prefer local MCP URL when available run: echo "LOCAL_MCP_URL=${{ steps.local-mcp.outputs.effective_url }}" >> "$GITHUB_ENV" - - name: Test with pytest (Anthropic, Ollama, MCP integration) + - name: Test with pytest (Anthropic, Hyperlight, Ollama, MCP integration) run: > uv run pytest --import-mode=importlib packages/anthropic/tests + packages/hyperlight/tests packages/ollama/tests packages/core/tests/core/test_mcp.py -m integration diff --git a/.gitignore b/.gitignore index 4994e9e2fe..c846efea7b 100644 --- a/.gitignore +++ b/.gitignore @@ -203,6 +203,8 @@ temp*/ # AI .claude/ +.omc/ +.omx/ WARP.md **/memory-bank/ **/projectBrief.md @@ -235,3 +237,4 @@ python/dotnet-ref # Generated filtered solution files (created by eng/scripts/New-FilteredSolution.ps1) dotnet/filtered-*.slnx +**/*.lscache diff --git a/docs/decisions/0024-codeact-integration.md b/docs/decisions/0024-codeact-integration.md new file mode 100644 index 0000000000..b83af6a17e --- /dev/null +++ b/docs/decisions/0024-codeact-integration.md @@ -0,0 +1,233 @@ +--- +status: proposed +contact: eavanvalkenburg +date: 2026-04-07 +deciders: TBD +consulted: +informed: +--- + +# CodeAct integration through backend-specific context providers and an `execute_code` tool + +## Introduction + +**CodeAct** is a pattern in which the model writes executable code — rather than emitting a fixed function-call JSON schema — to plan, transform data, and orchestrate tool calls inside a single sandbox invocation. Instead of requiring a separate model round-trip for every tool call, conditional branch, or data transformation, the model produces a short program that runs in a controlled runtime, calls host-provided tools through a `call_tool(...)` bridge, and returns structured results. This reduces latency, lowers token cost, and lets the model express richer multi-step logic that is difficult to capture in a flat tool-call sequence. + +Throughout this ADR, **CodeAct** is the primary term. **Code mode** and **programmatic tool calling** refer to the same capability. + +## Context and Problem Statement + +We need an architecture design that supports CodeAct in both Python and .NET. This is a necessary capability for the current generation of long-running agents, which need to plan, iterate, transform tool outputs, and execute bounded code inside a controlled runtime — for example, filtering a large result set, computing derived values, or chaining several tool calls with conditional logic — instead of requiring a separate model round-trip for each of those steps. The design should preserve the same behavioral contract across SDKs, but it does not need to use the same internal extension point in each runtime. We also want to standardize on Hyperlight as the initial backend, using the existing Python package and an anticipated .NET binding package once it is available. + +Throughout this ADR, **CodeAct** is the primary term. **Code mode** and **programmatic tool calling** refer to the same capability. This ADR uses **CodeAct** consistently. + +Model-generated code is treated as untrusted relative to the host process. This ADR assumes the selected backend provides the primary isolation boundary, while the framework is responsible for configuring approvals and capabilities, integrating telemetry, and translating outputs and failures into framework-native shapes. If a backend cannot provide isolation appropriate for its trust model, it is not a suitable CodeAct backend. + +The core design question is: **where should CodeAct integrate into the agent pipeline so that both SDKs can offer the same functionality without invasive changes to their core function-calling loops?** + +## Decision Drivers + +- CodeAct must shape the model-facing surface before model invocation, not only after the model has already chosen tools. +- The design should let users control which tools are available through CodeAct and which remain regular tools only. +- The design must preserve existing session, approval, telemetry, and tool invocation behavior as much as possible. +- The design should define the minimum cross-SDK telemetry and failure semantics for `execute_code`, so Python and .NET do not diverge on basic observability or error handling. +- The design must fit naturally into the extension points that already exist in each SDK. +- The design must be safe for concurrent runs and must not rely on mutating shared agent configuration during invocation. +- The chosen structure should allow multiple backend-specific providers to fit under the same conceptual design over time, even though Hyperlight is the initial target. +- The abstraction should not assume that every backend is a VM-style sandbox; alternative execution models such as Pydantic's Monty should also fit. +- The design should allow `execute_code` to be reused both as a tool-enabled CodeAct runtime and as a standard code interpreter tool implementation. +- The design should remain open to alternative language/runtime modes, such as JavaScript on Hyperlight, rather than baking the abstraction to Python only. +- The design should provide a portable way to configure sandbox capabilities such as file access and network access, including allow-listed outbound domains. +- Using CodeAct should be optional, and installing its runtime or backend dependencies should also be optional. +- Backend-specific dependencies should be isolated behind a small adapter so SDK code is not tightly coupled to an unstable package surface. + +## Considered Options + +- **Option 1**: Standardize on context provider-based CodeAct with a shared cross-SDK contract and backend-specific public types +- **Option 2**: Implement CodeAct as a dedicated chat-client decorator/wrapper +- **Option 3**: Integrate CodeAct directly into the function invocation layer/FunctionInvokingChatClient + +## Pros and Cons of the Options + +### Option 1: Standardize on context provider-based CodeAct with a shared cross-SDK contract and backend-specific public types + +This option uses `ContextProvider` in Python and `AIContextProvider` in .NET, but standardizes the public concept and behavior. +In this option, the CodeAct tool set is provider-owned: only tools explicitly configured on the concrete CodeAct provider instance are available inside CodeAct, and the provider exposes direct CRUD-style management for tools, file mounts, and outbound network allow-list configuration rather than requiring a separate runtime setup object. +The agent's direct tool surface remains separate. If a tool should be available both through CodeAct and as a normal direct tool, it is configured in both places. + +- Good, because both SDKs already have first-class provider concepts intended for per-invocation context shaping. +- Good, because providers operate before model invocation, which is where CodeAct must add instructions and reshape tools. +- Good, because this lets us preserve existing function invocation behavior rather than rewriting it. +- Good, because slightly different internals are acceptable while the public behavior remains aligned. +- Good, because convenience builder/decorator helpers can still be added later on top of the provider model without changing the core design. +- Good, because backend-specific runtime logic can stay inside concrete provider implementations or internal helpers instead of being forced into a lowest-common-denominator public abstraction. +- Good, because the same provider structure can support either an all-or-nothing tool surface or a mixed side-by-side tool surface. +- Good, because users can keep some tools direct-only while allowing other tools to be used from inside CodeAct. +- Good, because a provider-owned CodeAct tool registry avoids mutating or inferring the agent's direct tool surface and can work consistently in both SDKs. +- Good, because the same conceptual design can remain open to `HyperlightCodeActProvider`, a future `MontyCodeActProvider`, and other backend-specific providers over time. +- Good, because `execute_code` can evolve into multiple backend-specific runtime modes rather than being hard-wired to one Python-plus-tools mode. +- Bad, because the provider indirection adds per-run overhead — snapshotting the tool registry, dispatching lifecycle hooks, and building instructions — that a deeper integration point could skip. In practice this overhead is negligible relative to model inference latency and sandbox startup cost. + +### Option 2: Implement CodeAct as a dedicated chat-client decorator/wrapper + +This option would introduce a CodeAct-specific chat-client decorator that injects instructions and tools directly into the chat request pipeline. + +- Good, because this is a natural fit for .NET's `DelegatingChatClient` pipeline. +- Good, because it can also support advanced custom chat-client stacks. +- Good, because backend-specific runtime selection could be hidden inside the decorator implementation. +- Good, because the decorator could also encapsulate mode-specific instruction shaping for tool-enabled versus standalone interpreter behavior. +- Good, because the decorator can decide per request whether the tool surface is exclusive or mixed. +- Bad, because Python can support this by building a custom layering stack on top of a `Raw...Client` and swapping in a different `FunctionInvocationLayer`, but that composition path is more manual than the .NET `DelegatingChatClient` pipeline. +- Bad, because it duplicates responsibilities already handled by provider abstractions. +- Bad, because it makes CodeAct look more transport-specific than it really is. +- Bad, because swappable backends and reusable interpreter or language modes become coupled to chat-client composition rather than modeled as first-class CodeAct concepts. + +### Option 3: Integrate CodeAct directly into the function invocation layer/FunctionInvokingChatClient + +This option would push CodeAct into Python's `FunctionInvocationLayer` and .NET's `FunctionInvokingChatClient` or related middleware. + +- Good, because it is close to tool execution and can observe concrete tool invocation behavior. +- Good, because function middleware may still be useful later for auxiliary auditing or policy around sandbox-originated tool calls. +- Bad, because this is the wrong layer for constructing the model-facing tool surface and prompt instructions. +- Bad, because it does not naturally control whether the model sees an exclusive CodeAct tool surface or a mixed side-by-side tool surface. +- Bad, because it would still require a second mechanism for hiding normal tools and advertising `execute_code`. +- Bad, because it is a weak fit for standalone interpreter modes where no tool-calling loop is needed. +- Bad, because backend selection and CodeAct mode behavior are orthogonal concerns that do not belong in the function invocation layer. +- Bad, because `.NET` would become more tightly coupled to `FunctionInvokingChatClient`, which sits below the agent framework abstraction and is not the natural cross-SDK design seam. + +## Approval Model Options + +- **Option A**: Bundled approval for the `execute_code` invocation +- **Option B**: Pre-execution inspection of `call_tool(...)` references before approving `execute_code` +- **Option C**: Nested per-tool approvals during `execute_code` + +## Pros and Cons of the Approval Options + +### Option A: Bundled approval for the `execute_code` invocation + +This option grants approval once, before `execute_code` starts. Provider-owned tool calls made from inside that execution run under the same approval. The effective approval of `execute_code` is determined up front from the provider configuration rather than from inspecting which tools are actually called during execution. + +- Good, because it is the simplest model to explain and implement consistently in both SDKs. +- Good, because it fits naturally with long-running CodeAct loops where repeated approval interruptions would be disruptive. +- Good, because it does not require static code analysis before execution begins. +- Good, because it keeps the first release focused on the provider integration rather than a more complex approval engine. +- Bad, because approval is coarse-grained and may cover more activity than the user expected. +- Bad, because it provides less visibility into which provider-owned tools or capabilities will be exercised during the run. + +### Option B: Pre-execution inspection of `call_tool(...)` references before approving `execute_code` + +This option inspects submitted code for statically discoverable `call_tool("tool_name", ...)` references before execution starts and uses that information to shape the approval request. + +- Good, because it can show users more detail up front while still keeping approval at a single pre-execution moment. +- Good, because it matches the common case where tool names are spelled out directly in the generated code. +- Good, because it can coexist with bundled approval as a more informative variant of the same UX. +- Bad, because the analysis is inherently best-effort and cannot reliably predict dynamic behavior. +- Bad, because it requires duplicated parsing or inspection logic that does not replace runtime enforcement. + +### Option C: Nested per-tool approvals during `execute_code` + +This option requests approval when sandboxed code actually attempts to invoke a provider-owned tool that requires approval. + +- Good, because it aligns approval with real behavior rather than predicted behavior. +- Good, because it gives precise visibility into which provider-owned tools are being used. +- Good, because it can allow some tool calls while rejecting others within the same execution. +- Bad, because it interrupts long-running CodeAct flows and can degrade the user experience significantly. +- Bad, because it requires more complex runtime plumbing and approval UX in both SDKs. +- Bad, because repeated approval pauses may make CodeAct less useful for the exact long-running scenarios that motivate this feature. + +## Decision Outcomes + +### Decision 1: Integration seam and public structure + +Chosen option: **Option 1: Standardize on provider-based CodeAct with a shared cross-SDK contract and backend-specific public types**, because it is the only option that maps cleanly to both SDKs, lets us reshape instructions and tools before model invocation, and avoids invasive changes to the existing function invocation loops while still allowing multiple backend-specific providers and multiple runtime modes to fit under the same structure later. + +### Decision 2: Initial approval model + +Chosen option: **Option A: Bundled approval for the `execute_code` invocation**, because it is the smallest approval model that fits both SDKs, works well for long-running CodeAct flows, and does not force us to standardize a more complex inspection or policy engine in the first release. + +This follows the spirit of the current Python tool approval flow, where `FunctionTool` uses `approval_mode="always_require" | "never_require"` and the auto-invocation loop escalates the whole batch when any called tool requires approval. + +### Design summary + +We standardize the **public concept** of CodeAct across SDKs while allowing each SDK to use the extension point that fits it best. + +- Python uses a `ContextProvider`. +- .NET uses an `AIContextProvider`. +- The term **CodeAct context provider** is used throughout this ADR as a design concept, not as a required public base type. Public SDK APIs should prefer concrete backend-specific types such as `HyperlightCodeActProvider` rather than a public abstract `CodeActContextProvider` or a public `CodeActExecutor` parameter. +- CodeAct support should ship as an optional package in each SDK rather than as part of the core package, so users who do not need CodeAct do not take on its installation and dependency footprint. That optional package may still depend on a few small, backward-compatible hooks in the host SDK's core agent pipeline. +- There is no separate runtime setup object in the chosen design. Concrete providers manage their provider-owned CodeAct tool registry, file mounts, and outbound network allow-list configuration directly through CRUD-style methods on the provider itself. +- At a high level, CodeAct is exposed through backend-specific context providers that contribute an `execute_code` tool, own the CodeAct-specific tool registry, and carry backend capability configuration such as filesystem and network access. +- The initial approval model is bundled approval for `execute_code`, using the same `approval_mode="always_require" | "never_require"` vocabulary as regular tools. +- The CodeAct provider exposes a default `approval_mode` for `execute_code`. If the provider default is `always_require`, `execute_code` is always treated as `always_require` regardless of the provider-owned tool registry. If the provider default is `never_require`, the effective approval for `execute_code` is derived from the provider-owned CodeAct tool registry captured for the run. +- If every provider-owned CodeAct tool in that registry has `approval_mode="never_require"`, `execute_code` is treated as `never_require`. If any provider-owned CodeAct tool in that registry has `approval_mode="always_require"`, `execute_code` is treated as `always_require`, even if the generated code may not end up calling that tool. +- Approval is granted before `execute_code` starts, and provider-owned tool calls made from inside that execution run under the same approval. +- Direct-only agent tools do not affect the approval of `execute_code`; only the provider-owned CodeAct tool registry participates in that calculation. +- This approval model is intentionally conservative. If one sensitive provider-owned tool forces `execute_code` to require approval more often than desired, the mitigation is to keep that tool direct-only or split it into a different provider/tool surface rather than trying to infer per-run tool usage up front. +- Configuring filesystem and network capability state on the provider, including adding file mounts or outbound network allow-list entries, is itself the approval for those capabilities in the initial model. +- Each `execute_code` invocation must start from a clean execution state; in-memory variables and other ephemeral interpreter/runtime state must not persist across separate calls. When a provider exposes a workspace, mounted files, or a writable artifact/output area, those files are the supported persistence mechanism across calls and are treated as external state rather than interpreter state. +- Mutating the provider's tool registry or capability configuration while a run is in flight is allowed, but it only affects subsequent runs. Provider implementations must snapshot the effective state for each run and synchronize concurrent access so shared provider instances remain safe across concurrent runs. +- The minimum cross-SDK telemetry contract is that `execute_code` is traced as a normal tool invocation nested inside the surrounding agent run, and provider-owned tool calls made from inside CodeAct continue to emit ordinary tool-invocation telemetry. Backend-specific resource metrics are optional extensions, not a required new top-level cross-SDK event model. +- Timeout, out-of-memory, backend crash, and similar sandbox failures are all execution failures of `execute_code` and should surface as structured error results rather than backend-specific public DTOs. Partial textual or file outputs may be returned only when the backend can report them unambiguously; callers must not rely on partial-output recovery as a portable guarantee. +- The provider-based structure preserves room for future pre-execution inspection and nested per-tool approvals if later experience shows they are needed. +- Concrete backend-specific providers may still use small SDK-local helpers or adapters internally, but that split is an implementation detail rather than a public API requirement. + +Detailed language-specific implementation notes are specified in: + +- [Python implementation](../features/code_act/python-implementation.md) +- [.NET implementation](../features/code_act/dotnet-implementation.md) + +### Minimal core hooks required by the optional package + +CodeAct remains optional at the package level, but the optional package depends on a small number of hooks that must live in the host SDK because the agent pipeline owns model invocation and per-run tool resolution. + +- Python depends on the existing `ContextProvider` lifecycle, `SessionContext.extend_instructions(...)`, `SessionContext.extend_tools(...)`, per-run runtime tool access via `SessionContext.options["tools"]`, and the shared `ApprovalMode` vocabulary used by `FunctionTool`. +- .NET depends on the existing `AIContextProvider` seam, agent/runtime support for applying providers before model invocation, and the existing chat-client or function-invocation seams that concrete implementations use to contribute `execute_code`. + +These hooks are backward-compatible because they only expose or forward per-run state that core already owns. Behavior changes only when a concrete CodeAct provider opts in and uses them. + +### Concrete provider implementation contract + +The design does not require a public abstract `CodeActContextProvider` base class, but it does require a stable implementation contract for concrete providers. + +- Concrete providers should expose a standard capability surface at construction time, with SDK-appropriate naming for: + - approval mode + - workspace root + - file mounts + - allowed outbound targets plus any per-target method or policy restrictions needed by the backend +- Separate public `filesystem_mode` / `network_mode` flags are not required by the cross-SDK contract. Filesystem access may be disabled implicitly until a workspace or file mounts are configured, and outbound network may be disabled implicitly until an allow-list or equivalent outbound policy entry is configured. +- Concrete providers should expose direct CRUD-style methods for managing the provider-owned CodeAct tool registry, file mounts, and outbound network allow-list configuration, rather than requiring callers to construct a separate runtime setup object. +- Concrete providers should implement their host SDK's provider lifecycle hooks to: + - build CodeAct instructions, + - add `execute_code`, + - snapshot the effective CodeAct tool registry and capability settings for the run, + - compute the effective approval requirement for `execute_code`, + - configure file access and network access for the backend, + - prepare or restore execution state, + - execute code, + - and translate backend output into framework-native content. +- Any internal abstract/helper surface shared by multiple concrete providers should standardize responsibilities for: + - instruction construction, + - file-access configuration, + - network-access configuration, + - environment preparation/restoration, + - code execution, + - and output-to-content conversion. +- Backend execution output should reuse existing framework-native content/message primitives rather than introducing backend-specific public result DTOs. + +## More Information + +### Related artifacts + +- Python implementation: [`docs/features/code_act/python-implementation.md`](../features/code_act/python-implementation.md) +- .NET implementation: [`docs/features/code_act/dotnet-implementation.md`](../features/code_act/dotnet-implementation.md) +- Python provider/session APIs: [`python/packages/core/agent_framework/_sessions.py`](../../python/packages/core/agent_framework/_sessions.py) +- Python function invocation loop: [`python/packages/core/agent_framework/_tools.py`](../../python/packages/core/agent_framework/_tools.py) +- .NET context provider abstraction: [`dotnet/src/Microsoft.Agents.AI.Abstractions/AIContextProvider.cs`](../../dotnet/src/Microsoft.Agents.AI.Abstractions/AIContextProvider.cs) +- .NET agent integration for context providers: [`dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs`](../../dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs) +- Optional .NET chat-client provider decorator: [`dotnet/src/Microsoft.Agents.AI/AIContextProviderDecorators/AIContextProviderChatClient.cs`](../../dotnet/src/Microsoft.Agents.AI/AIContextProviderDecorators/AIContextProviderChatClient.cs) +- .NET function invocation middleware seam: [`dotnet/src/Microsoft.Agents.AI/FunctionInvocationDelegatingAgentBuilderExtensions.cs`](../../dotnet/src/Microsoft.Agents.AI/FunctionInvocationDelegatingAgentBuilderExtensions.cs) + +### Related decisions + +- [0015-agent-run-context](0015-agent-run-context.md) +- [0016-python-context-middleware](0016-python-context-middleware.md) diff --git a/docs/decisions/0025-foundry-toolbox-support.md b/docs/decisions/0025-foundry-toolbox-support.md new file mode 100644 index 0000000000..a68b98b3bf --- /dev/null +++ b/docs/decisions/0025-foundry-toolbox-support.md @@ -0,0 +1,454 @@ +--- +status: proposed +contact: evmattso +date: 2026-04-10 +deciders: evmattso +--- + +# Foundry Toolbox Support in FoundryChatClient + +## What is the goal of this feature? + +Enable Agent Framework users to consume Foundry **toolboxes** — named, versioned bundles of tool definitions stored server-side in an Azure AI Foundry project — directly from `FoundryChatClient`, without dropping to the raw `azure-ai-projects` SDK. + +A user who has configured a toolbox in the Foundry portal (or via the raw SDK) should be able to load it into an agent with a single call: + +```python +toolbox = await client.get_toolbox("research_tools") +agent = Agent(client=client, instructions="...", tools=toolbox) +``` + +**Success metric:** an agent can consume a toolbox with no manual handling of version-resolution logic on the user's side. + +## What is the problem being solved? + +`azure-ai-projects==2.1.0a20260409002` ships a new `BetaToolboxesOperations` surface, reachable as `AIProjectClient.beta.toolboxes` on the raw SDK client (and therefore as `FoundryChatClient.project_client.beta.toolboxes` through our wrapper), that lets teams: +- Group related hosted tools (code interpreter, file search, MCP, web search, etc.) under a named toolbox +- Version toolboxes immutably, so agents can pin to a specific configuration for production stability +- Share toolboxes across multiple agents in a project + +However, consuming a toolbox from the framework today requires: +1. Knowing the raw SDK accessor path (`client.project_client.beta.toolboxes`) +2. Making two calls for the common case — `.get(name)` to find the default version, then `.get_version(name, version)` to actually retrieve tools +3. Manually unpacking `toolbox.tools` before passing them to `Agent(tools=...)` + +None of this is hard, but it's the kind of boilerplate that should live in the client. Every other hosted tool in `FoundryChatClient` (code interpreter, file search, web search, image generation, MCP) already has a factory method (`get_code_interpreter_tool()`, etc.). Toolbox support should fit the same shape on the chat-client composition surface. + +## API Changes + +### One new method on the FoundryChatClient surface + +The public toolbox-consumption surface lands on: + +- `RawFoundryChatClient` (inherited by `FoundryChatClient`) in `_chat_client.py` + +The implementation delegates to shared helper functions in `_tools.py` so there is a single source of truth for the SDK calls. + +**Scope note:** `FoundryAgent` is intentionally not part of this design. `FoundryAgent` is the runtime surface for invoking an already-configured server-side Foundry agent; if that agent should use a toolbox, the toolbox/tools should already be configured on the Foundry side (UI or `azure-ai-projects` authoring flow) before MAF connects to it. + +**Scope note:** Authoring a server-side agent whose definition references a toolbox (via `PromptAgentDefinition(tools=toolbox.tools, ...)` + `client.agents.create_version(...)`) is deliberately outside MAF scope. That is an `azure-ai-projects` / service-resource authoring concern, not a future MAF feature. Users who need it should use the raw Azure SDK directly. + +```python +async def get_toolbox( + self, + name: str, + *, + version: str | None = None, +) -> ToolboxVersionObject: + """Fetch a Foundry toolbox by name. + + If ``version`` is ``None``, resolves the toolbox's current default version + (two requests). If ``version`` is specified, fetches that version directly + (single request). + + :param name: The name of the toolbox. + :param version: Optional immutable version identifier to pin to. + :return: A ``ToolboxVersionObject``. Pass its ``tools`` attribute to + ``Agent(tools=toolbox.tools)``. + :raises azure.core.exceptions.ResourceNotFoundError: If the toolbox or + version does not exist. + """ + +``` + +### Return types: raw SDK models, no custom wrappers + +Methods return the `azure.ai.projects.models` types directly: + +- `get_toolbox()` → `ToolboxVersionObject` (has `.name`, `.version`, `.tools`, `.id`, `.created_at`, `.description`, `.metadata`, `.policies`) + +No custom wrapper classes are defined. Returning the SDK types directly: +- Eliminates maintenance overhead of keeping a custom wrapper aligned with SDK changes +- Matches the existing convention — `get_code_interpreter_tool()` returns the raw `CodeInterpreterTool` SDK type +- Means any new fields the SDK adds to these types flow through automatically + +`Agent(..., tools=...)` will accept the fetched toolbox object directly by flattening to `toolbox.tools` internally. + +### Design decisions + +**Instance methods, not `@staticmethod` factories.** Existing `get_code_interpreter_tool()` / `get_mcp_tool()` / etc. are `@staticmethod` because they're pure factories with no network I/O. Toolbox fetching requires the project client, so these new methods must be instance methods. This is a deliberate departure from the existing-factory pattern, justified by the async-with-I/O nature of the operation. + +**Raw SDK type passthrough (no custom wrappers).** There is only one toolbox type in the Foundry SDK and maintaining a shadow wrapper would create alignment risk as the SDK evolves. The raw `ToolboxVersionObject` and `ToolboxObject` carry all the fields users need. Individual tools inside `toolbox.tools` are the same `azure.ai.projects.models.Tool` subclasses returned by other factory methods. + +**Two-request default-version path.** When `version=None`, implementation calls `.get(name)` to find `default_version`, then `.get_version(name, default_version)` for the tools. Caching the default-version mapping was considered and rejected — default versions can change server-side via `update(default_version=...)`, and a stale cache would silently give callers the wrong tools. Two requests at agent setup is acceptable. + +**No discovery/listing surface in MAF.** Discovery is intentionally left to the raw `azure-ai-projects` client. MAF does not currently expose project-resource listing surfaces for many other Foundry resources (deployments, vector stores, agents, etc.), so the toolbox design stays narrowly focused on explicit retrieval by name/version. + +**Shared helpers in `_tools.py`.** The SDK-call helper function (`fetch_toolbox`) lives in a shared module so the chat-client surface stays thin and the request logic remains centralized. + +**`tools=toolbox` convenience, not a new wrapper type.** Although `get_toolbox()` returns the raw `ToolboxVersionObject`, Agent Framework can still support `tools=toolbox` / `tools=[toolbox]` by flattening the toolbox's `.tools` internally. That matches existing SDK ergonomics where some higher-level objects can be placed directly in `tools=` and unpacked underneath, without introducing a public `FoundryToolbox` wrapper. + +**Errors pass through unchanged.** `ResourceNotFoundError`, `HttpResponseError`, etc. from the SDK propagate as-is. No framework-specific exception hierarchy. + +## E2E Code Samples + +### Primary sample + +New file: `samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox.py` + +```python +import asyncio + +from agent_framework import Agent +from agent_framework.foundry import FoundryChatClient +from azure.identity import AzureCliCredential + + +async def main() -> None: + client = FoundryChatClient(credential=AzureCliCredential()) + + toolbox = await client.get_toolbox("research_tools") + print(f"Loaded toolbox {toolbox.name}@{toolbox.version} ({len(toolbox.tools)} tools)") + + agent = Agent( + client=client, + instructions="You are a research assistant.", + tools=toolbox, + ) + + result = await agent.run("What are the latest developments in quantum error correction?") + print(f"Result: {result}") + + +if __name__ == "__main__": + asyncio.run(main()) +``` + +### Version pinning + +```python +toolbox = await client.get_toolbox("research_tools", version="v3") +``` + +### Combining multiple toolboxes + +```python +toolbox_a = await client.get_toolbox("research_tools") +toolbox_b = await client.get_toolbox("some_other_tools", version="v3") + +agent = Agent( + client=client, + instructions="...", + tools=[toolbox_a, toolbox_b], +) +``` + +### Combining toolbox tools with locally defined tools + +```python +toolbox = await client.get_toolbox("research_tools") + +def get_internal_metrics(metric_name: str) -> dict: + """Custom tool that reads from an internal dashboard.""" + ... + +agent = Agent( + client=client, + instructions="...", + tools=[get_internal_metrics, toolbox], +) +``` + +### Selecting only some tools from a toolbox + +Developers will not always want to pass the entire toolbox through unchanged. A +small helper in the Foundry package provides local post-fetch selection without +changing the raw return type of `get_toolbox()`. + +```python +from agent_framework.foundry import select_toolbox_tools + +toolbox = await client.get_toolbox("research_tools") + +selected_tools = select_toolbox_tools( + toolbox, + include_names=["githubmcp", "code_interpreter"], +) + +agent = Agent( + client=client, + instructions="Use only the selected toolbox tools.", + tools=selected_tools, +) +``` + +Supported filters: + +```python +from agent_framework.foundry import FoundryHostedToolType, select_toolbox_tools + +selected_tools = select_toolbox_tools( + toolbox, + include_types=["mcp", "code_interpreter"], # type: Collection[FoundryHostedToolType] + exclude_names=["internal_admin_tool"], +) +``` + +Helper signature: + +```python +type FoundryHostedToolType = Literal[ + "code_interpreter", + "file_search", + "image_generation", + "mcp", + "web_search", +] | str + +def select_toolbox_tools( + tools: ToolboxVersionObject | Sequence[Tool | dict[str, Any]], + *, + include_names: Collection[str] | None = None, + exclude_names: Collection[str] | None = None, + include_types: Collection[FoundryHostedToolType] | None = None, + exclude_types: Collection[FoundryHostedToolType] | None = None, + predicate: Callable[[Tool | dict[str, Any]], bool] | None = None, +) -> list[Tool | dict[str, Any]]: + ... +``` + +Normalized name precedence for `include_names` / `exclude_names`: + +1. MCP `server_label` +2. generic tool `name` +3. fallback tool `type` + +This keeps `get_toolbox()` as a thin fetch API and makes selection an explicit, +local post-processing step, while still allowing the ergonomic +`select_toolbox_tools(toolbox, ...)` call shape. + +## Native vs MCP consumption of a Foundry toolbox + +A Foundry toolbox can be consumed two ways. This design adds new implementation work only for the first: + +1. **Native consumption (in scope).** Tools execute inside Foundry's agent runtime. `get_toolbox()` returns the `ToolboxVersionObject` whose `.tools` attribute carries typed tool configs that the runtime interprets server-side. This design is specifically for `FoundryChatClient`-backed local agent composition. + +2. **MCP consumption (already supported through existing MCP abstractions).** A Foundry toolbox can also be exposed as an MCP server. In that case, use the existing `MCPStreamableHTTPTool(name=..., url=...)` — it already handles this path with any chat client (Foundry, OpenAI, Anthropic, etc.). No new Foundry-specific API is needed for MCP-exposed toolboxes in this design. + +### MCPStreamableHTTPTool example for a Foundry toolbox endpoint + +If Foundry gives you an MCP endpoint for the toolbox (for example from the +toolbox details UI / endpoint surface), the existing MCP client path is: + +```python +from agent_framework import Agent, MCPStreamableHTTPTool +from agent_framework.openai import OpenAIChatClient + +toolbox_mcp = MCPStreamableHTTPTool( + name="research_tools", + url="https://", +) + +agent = Agent( + client=OpenAIChatClient(), + instructions="You are a research assistant.", + tools=[toolbox_mcp], +) +``` + +This is a different integration shape than `get_toolbox(...).tools`: + +- `get_toolbox(...).tools` = **native Foundry hosted-tool configs** interpreted by the + Foundry runtime +- `MCPStreamableHTTPTool(name=..., url=...)` = **live MCP server connection** to a + toolbox endpoint + +The design in this spec adds first-class support only for the native hosted-tool +path. The MCP path is already served by the framework's existing MCP abstractions. + +These paths are not unified because they have fundamentally different execution models. Native toolbox tools are declarative configs the Foundry runtime executes; MCP consumption is a live wire protocol to a running server. + +**MCP authentication inside a toolbox** is handled server-side via `project_connection_id` on individual `MCPTool` entries (OAuth connection objects configured in the Foundry project). The client never holds bearer tokens. Consent flow handling (`CONSENT_REQUIRED` → user-visible consent URL) happens during `agent.run()`, not during toolbox fetching — see Non-goals. + +## Testing Strategy + +Unit tests in `packages/foundry/tests/test_toolbox.py` with mocked `project_client.beta.toolboxes`. A single opt-in live round-trip, `test_integration_get_toolbox_round_trip_against_real_project`, is marked `@pytest.mark.integration`; it is skipped by default and only runs when the required Foundry credentials are available. + +Coverage: + +- `get_toolbox(name, version="v3")` — explicit version, single request. Assert `.get` not called, `.get_version` awaited once, returns `ToolboxVersionObject`. +- `get_toolbox(name)` — default-version resolution. Assert `.get` then `.get_version` called in order with correct args. +- Error propagation — `ResourceNotFoundError` from `.get` propagates unchanged. +- Tool passthrough — heterogeneous tool list (`CodeInterpreterTool`, `MCPTool(project_connection_id=...)`) passes through unchanged. Asserts `project_connection_id` survives. +- Agent integration smoke — `tools=toolbox` / `tools=[toolbox]` flatten to the underlying toolbox tools. +- Multiple toolbox composition smoke — `tools=[toolbox_a, toolbox_b]` flattens into a single agent tool list. +- `get_toolbox_tool_name()` — selection-name precedence is MCP `server_label`, then `name`, then `type`. +- `select_toolbox_tools(toolbox, include_names=...)` — selects by normalized tool names directly from a fetched toolbox object. +- `select_toolbox_tools(toolbox, include_types=...)` — selects by tool types with `Literal`-guided IDE completion. +- `select_toolbox_tools(..., exclude_names=..., predicate=...)` — supports exclusion + custom predicates. + +Deliberately **not** covered: +- Runtime consent-flow handling for OAuth MCP tools (see Non-goals). +- Toolbox discovery/listing (`list_toolboxes`, `list_toolbox_versions`) — deliberately left to the raw Azure SDK. +- Full CRUD (`create_version`, `update`, `delete`) and server-side agent authoring — see Non-goals. + +Live Foundry API integration is exercised only through the opt-in `@pytest.mark.integration` round-trip noted above; it is not part of the default test run. + +## Framework dependency: `normalize_tools` flattening + +The core `normalize_tools` function in `packages/core/agent_framework/_tools.py` already supports flattening composite tool inputs. Toolbox support extends that behavior so a fetched `ToolboxVersionObject` is treated as a composite tool source and flattened to its `.tools`. + +That enables: + +- `tools=toolbox` +- `tools=[toolbox]` +- `tools=[local_tool, toolbox]` +- `tools=[toolbox_a, toolbox_b]` + +while still keeping `select_toolbox_tools(toolbox.tools, ...)` available for partial selection before the final agent construction step. + +## Telemetry + +Telemetry for toolbox support has two separate goals: + +1. **Observe toolbox API access** — `get_toolbox()` +2. **Observe toolbox usage during agent runs** — when users pass toolbox-derived tools into `Agent(..., tools=...)` + +### Request telemetry for toolbox API access + +When Agent Framework constructs the `AIProjectClient` internally for `FoundryChatClient`, it already sets: + +```python +user_agent=AGENT_FRAMEWORK_USER_AGENT +``` + +That means toolbox API requests made through: + +- `project_client.beta.toolboxes.get(...)` +- `project_client.beta.toolboxes.get_version(...)` + +carry the standard MAF user-agent marker and can be queried in backend request logs the same way as other Foundry SDK calls made through framework-owned clients. + +Important constraint: if the caller passes an already-constructed `project_client`, Agent Framework does **not** mutate it to inject the MAF user-agent. In that case, toolbox API request telemetry reflects whatever user-agent behavior that external client was configured with. + +### Runtime telemetry for toolbox usage on agent runs + +Tool-level telemetry already captures which hosted Foundry tools are available / invoked during agent execution. The remaining gap is **toolbox provenance**: once the user writes `tools=toolbox` (or otherwise flattens the toolbox into tool configs), the framework sees only raw tool configs and no longer knows which toolbox name/version supplied them. + +The design for closing the **client-side** observability gap is **internal provenance tracking**, not user-supplied metadata and not a new public wrapper type. + +#### Provenance model + +Note: this section is still under investigation. + +When `get_toolbox()` or `list_toolbox_versions()` returns a `ToolboxVersionObject`, Agent Framework will attach private provenance metadata to: + +- the returned toolbox object +- each tool inside `toolbox.tools` + +Recommended shape (private, internal-only): + +```python +tool._maf_toolbox_sources = [ + { + "id": toolbox.id, + "name": toolbox.name, + "version": toolbox.version, + } +] +``` + +Key properties of this approach: + +- **No new public API surface** — users still work with raw `ToolboxVersionObject` / `ToolboxObject` +- **No user burden** — callers do not need to stamp metadata manually +- **Provenance follows the tool objects** — works with: + - `tools=toolbox.tools` + - `tools=[toolbox_a.tools, toolbox_b.tools]` + - `tools=[*toolbox_a.tools, *toolbox_b.tools]` +- **Private attributes are not serialized** into the actual request payload sent to the model/service, so this metadata does not leak into the tool definition body + +This is intentionally preferred over introducing a new public `FoundryToolbox` wrapper purely for telemetry, and preferred over a separate global provenance registry. The provenance lives on the existing tool objects so list-copying and chat-option merging naturally preserve it. + +#### Span enrichment + +When Agent / chat telemetry computes span attributes for a run, it should inspect the final tool list and aggregate the private toolbox provenance from any tool objects that carry it. The aggregated values are then emitted as attributes on the existing run/chat spans. + +Suggested custom attributes: + +- `agent_framework.foundry.toolbox.ids` +- `agent_framework.foundry.toolbox.names` +- `agent_framework.foundry.toolbox.versions` +- or a single compact attribute such as `agent_framework.foundry.toolbox.sources=["research_tools@1","some_other_tools@3"]` + +The single compact `toolbox.sources` form is preferred for initial implementation because it is easy to query and easy to render from combined tool lists. + +#### Scope of telemetry changes + +This design does **not** require new spans. It enriches existing telemetry: + +- toolbox API access continues to rely on request logs + Azure SDK distributed tracing + MAF user-agent +- agent/chat execution spans gain toolbox provenance attributes when toolbox-derived tools are present + +Implementation-wise, this design most likely touches: + +- `packages/foundry/agent_framework_foundry/_tools.py` — to stamp provenance on fetched toolbox objects / tools +- `packages/core/agent_framework/observability.py` — to aggregate provenance into span attributes + +#### Important limitation: no server-side toolbox telemetry solution yet + +Private provenance attached to tool objects is only useful on the client side. It +does **not** go over the wire to the Foundry service because those private fields +are intentionally not serialized into the request payload. + +That means this design can support: + +- local OpenTelemetry / exporter spans emitted by Agent Framework +- local attribution of a run to one or more fetched toolboxes + +but it does **not** solve: + +- server-side request-log attribution of a model/tool run back to a toolbox +- backend/database queries that need the service itself to know "this tool came from toolbox X" + +At the moment, we do not have a satisfactory design for server-side toolbox +telemetry. The service would require additional structured information on the +request, and there is no accepted mechanism in this design yet for projecting +toolbox provenance into a server-visible field/header/metadata shape. + +So the telemetry story in this spec is explicitly limited to **client-side +toolbox telemetry**. Server-side toolbox attribution remains an open question and +requires either: + +- new service/API support, or +- a later framework design for emitting additional server-visible request metadata. + +#### Deliberate non-goals for telemetry + +- No requirement for users to pass explicit toolbox metadata in `default_options["metadata"]` or `run(..., options=...)` +- No new public `FoundryToolbox` wrapper type just to preserve attribution +- No attempted server-side attribution mechanism in this design (for example a custom request header or request metadata field) until there is a validated end-to-end contract for it + +## Non-goals / Future Work + +Explicitly out of scope for this design. Each is a separate design and PR when needed. + +1. **Create/update/delete toolboxes from code.** CRUD is rare in agent consumption flows. Users who need it drop to `client.project_client.beta.toolboxes.create_version(...)`, `.update(...)`, `.delete(...)` directly. + +2. **Server-side agent authoring from toolbox.** Creating a `PromptAgentDefinition(tools=toolbox.tools)` + `client.agents.create_version(...)` is a future feature covering agent authoring from code. The toolbox read API provides the building blocks; the authoring helpers are a separate design. + +3. **OAuth consent-flow runtime handling.** When a toolbox contains MCP tools with `project_connection_id` pointing to an OAuth connection, the runtime may return `CONSENT_REQUIRED` mid-run. This is a runtime concern separate from toolbox fetching. + +4. **Live integration tests.** This PR ships unit tests only. + +5. **Toolbox caching or refresh APIs.** Each `get_toolbox()` call hits the network. Users who want caching wrap the call themselves. diff --git a/docs/features/code_act/dotnet-implementation.md b/docs/features/code_act/dotnet-implementation.md new file mode 100644 index 0000000000..5a2b51ae3a --- /dev/null +++ b/docs/features/code_act/dotnet-implementation.md @@ -0,0 +1,625 @@ +# CodeAct .NET implementation + +This document describes the .NET realization of the CodeAct design in +[`docs/decisions/0024-codeact-integration.md`](../../decisions/0024-codeact-integration.md). + +This document is intentionally focused on the .NET design and public API surface. +The initial public .NET type described here is `HyperlightCodeActProvider`. Future .NET backends, such as Monty, should follow the same conceptual model with their own concrete provider types rather than through a public abstract base class or a public executor parameter. + +## What is the goal of this feature? + +Goals: +- .NET developers can enable CodeAct through an `AIContextProvider`-based integration. +- Developers can configure a provider-owned CodeAct tool set that is separate from the agent's direct tool surface. +- Developers can use the same `execute_code` concept for both tool-enabled CodeAct and a standard code interpreter tool implementation. +- Developers can swap execution backends over time, starting with Hyperlight while keeping room for alternatives. +- Developers can configure execution capabilities such as workspace mounts and outbound network allow lists in a portable way. + +Success Metric: +- .NET samples exist for both a tool-enabled CodeAct mode and a standard interpreter mode. + +Implementation-free outcome: +- A .NET developer can attach a backend-specific CodeAct provider, choose which tools are available inside CodeAct, and configure execution capabilities without rewriting the function invocation loop or ChatClient pipeline. + +## What is the problem being solved? + +The cross-SDK problem statement and decision rationale live in the [ADR](../../decisions/0024-codeact-integration.md). The items below narrow that statement to .NET-specific design concerns: + +- Today, the easiest way to prototype CodeAct in .NET is to manually configure an `AIFunction` and wire instructions — this is fragile and requires understanding internal sandbox lifecycle details. +- There is no first-class .NET design that simultaneously covers Hyperlight-backed CodeAct now, future backend-specific providers, and both tool-enabled and interpreter modes. +- Sandbox capabilities such as mounted file access and outbound network access need a portable configuration model instead of ad hoc backend-specific wiring. +- Approval behavior needs to be explicit and configurable, mapping to .NET's existing `ApprovalRequiredAIFunction` wrapper mechanism. + +## API Changes + +### CodeAct contract + +#### Terminology + +- **CodeAct** is the primary term. +- `execute_code` is the model-facing tool name used by the initial .NET provider in this spec. +- Tool-enabled versus interpreter behavior is derived from the presence of CodeAct-managed tools, not from a separate public profile object. + +#### Provider-owned CodeAct tool registry + +A concrete .NET CodeAct provider owns the set of tools available through `call_tool(...)` inside CodeAct. + +Rules: +- Only tools explicitly configured on the concrete provider instance are available inside CodeAct. +- The provider must not infer its CodeAct-managed tool set from the agent's direct tool configuration (`ChatClientAgentOptions.Tools` or `AIContext.Tools`). +- Exclusive versus mixed behavior is achieved by where tools are configured, not by rewriting the agent's direct tool list. + +Implications: +- **CodeAct-only tool**: configured on the concrete CodeAct provider only. +- **Direct-only tool**: configured on the agent only. +- **Tool available both ways**: configured on both the agent and the concrete CodeAct provider. + +#### Managing tools and capabilities after provider construction + +There is no separate runtime setup object in the .NET design. CodeAct tools, file mounts, and outbound network allow-list state are managed directly on the provider through CRUD-style registry methods. + +Preferred pattern: +- `AddTools(params AIFunction[] tools) -> void` +- `GetTools() -> IReadOnlyList` +- `RemoveTools(params string[] names) -> void` +- `ClearTools() -> void` +- `AddFileMounts(params FileMount[] mounts) -> void` +- `GetFileMounts() -> IReadOnlyList` +- `RemoveFileMounts(params string[] mountPaths) -> void` +- `ClearFileMounts() -> void` +- `AddAllowedDomains(params AllowedDomain[] domains) -> void` +- `GetAllowedDomains() -> IReadOnlyList` +- `RemoveAllowedDomains(params string[] targets) -> void` +- `ClearAllowedDomains() -> void` + +Requirements: +- The provider-owned CodeAct tool registry is keyed by tool name (from `AIFunction.Name`). +- `AddTools(...)` adds new tools and replaces an existing provider-owned registration when the same tool name is added again. +- `GetTools()` returns the provider's current configured CodeAct tool registry. +- `RemoveTools(...)` removes provider-owned CodeAct tools by name. +- `ClearTools()` removes all provider-owned CodeAct tools. +- File mounts are keyed by sandbox mount path. +- `AddFileMounts(...)` adds new file mounts and replaces an existing mount when the same mount path is added again. +- `GetFileMounts()` returns the provider's current configured file mounts. +- `RemoveFileMounts(...)` removes file mounts by mount path. +- `ClearFileMounts()` removes all configured file mounts. +- Allowed domains are keyed by normalized target string. +- `AddAllowedDomains(...)` adds allow-list entries and replaces an existing entry when the same target is added again. +- `GetAllowedDomains()` returns the current outbound allow-list entries. +- `RemoveAllowedDomains(...)` removes allow-list entries by target. +- `ClearAllowedDomains()` removes all configured allow-list entries. +- Tool, file-mount, and network-allow-list mutations affect subsequent runs only; runs already in progress keep the snapshot captured at run start. +- The provider must snapshot its effective tool registry and capability state at the start of each run so concurrent execution remains deterministic. + +#### Approval model + +The initial .NET design follows the ADR's bundled approval decision and maps to the existing `ApprovalRequiredAIFunction` wrapper from `Microsoft.Extensions.AI.Abstractions`: + +- The provider exposes a default `ApprovalMode` for `execute_code` (enum: `CodeActApprovalMode.AlwaysRequire` / `CodeActApprovalMode.NeverRequire`). + +Effective `execute_code` approval is computed as follows: + +- If the provider default is `AlwaysRequire`, `execute_code` requires approval. +- If the provider default is `NeverRequire`, the provider evaluates the provider-owned CodeAct tool registry snapshot for that run. + - If every provider-owned CodeAct tool in that snapshot is not an `ApprovalRequiredAIFunction`, `execute_code` does not require approval. + - If any provider-owned CodeAct tool in that snapshot is an `ApprovalRequiredAIFunction`, `execute_code` requires approval, even if the generated code may not call that tool. +- When the effective approval resolves to `AlwaysRequire`, the generated `execute_code` function is wrapped in `ApprovalRequiredAIFunction` before being added to the `AIContext.Tools`. +- Provider-owned tool calls made through `call_tool(...)` during that execution run use the approval already determined for `execute_code`. +- Direct-only agent tools are excluded from this calculation. +- File and network capabilities do not create a separate runtime approval check in the initial model; configuring them on the provider is itself the approval for those capabilities. + +This is intentionally conservative and matches the shape of the existing .NET function-tool approval flow, where `ApprovalRequiredAIFunction` signals to the `ChatClientAgent` that user approval is needed before invocation. + +#### Shared execution flow + +On each run: +1. `ProvideAIContextAsync(...)` snapshots the current CodeAct-managed tool registry and capability settings. +2. Computes the effective approval requirement for `execute_code` from the provider default plus the snapshotted tool registry. +3. Builds provider-defined instructions. +4. Builds a run-scoped `execute_code` `AIFunction` from the snapshot (optionally wrapped in `ApprovalRequiredAIFunction`). +5. Returns an `AIContext` containing the instructions and `execute_code` tool. +6. When `execute_code` is invoked by the model, the run-scoped function creates or reuses an execution environment. +7. If the current provider mode exposes host tools, `call_tool(...)` is bound only to the provider-owned tool registry snapshot. +8. Code is executed and results converted to a JSON result string. + +Caching rules: +- The Hyperlight backend supports snapshots: the provider caches a reusable clean snapshot after the first sandbox initialization. +- No mutable per-run execution state may be shared across concurrent runs. +- In-memory interpreter state does not persist across separate `execute_code` calls. +- Configured workspace files, mounted files, and any writable artifact/output area are the supported persistence mechanism across calls when the backend exposes them. + +### .NET public API + +#### Core types + +```csharp +/// +/// Represents a host-to-sandbox file mount configuration. +/// +/// Absolute or relative path on the host filesystem. +/// Path inside the sandbox (e.g. "/input/data.csv"). +public sealed record FileMount(string HostPath, string MountPath); + +/// +/// Represents an outbound network allow-list entry. +/// +/// URL or domain (e.g. "https://api.github.com"). +/// +/// Optional HTTP methods to allow (e.g. ["GET", "POST"]). +/// Null allows all methods supported by the backend. +/// +public sealed record AllowedDomain(string Target, IReadOnlyList? Methods = null); + +/// +/// Controls the approval behavior for execute_code invocations. +/// +public enum CodeActApprovalMode +{ + /// execute_code always requires user approval. + AlwaysRequire, + + /// + /// Approval is derived from the provider-owned tool registry: + /// if any tool is an ApprovalRequiredAIFunction, execute_code requires approval. + /// + NeverRequire, +} +``` + +#### HyperlightCodeActProvider + +```csharp +/// +/// An AIContextProvider that enables CodeAct execution through the +/// Hyperlight sandbox backend. +/// +/// +/// +/// This provider injects an execute_code tool into the model-facing +/// tool surface and builds CodeAct guidance instructions. Guest code executed +/// through execute_code runs in an isolated Hyperlight sandbox with +/// snapshot/restore for clean state per invocation. +/// +/// +/// If no CodeAct-managed tools are configured, the provider uses +/// interpreter-style behavior. If one or more CodeAct-managed tools are +/// configured, the provider uses tool-enabled behavior and exposes +/// call_tool(...) inside the sandbox bound to the configured tools. +/// +/// +public sealed class HyperlightCodeActProvider : AIContextProvider, IDisposable +{ + /// + /// Initializes a new HyperlightCodeActProvider. + /// + /// Configuration options for the provider. + public HyperlightCodeActProvider(HyperlightCodeActProviderOptions options); + + // ----- Tool registry ----- + + /// Adds tools to the provider-owned CodeAct tool registry. + public void AddTools(params AIFunction[] tools); + + /// Returns the current CodeAct-managed tools. + public IReadOnlyList GetTools(); + + /// Removes tools by name from the CodeAct tool registry. + public void RemoveTools(params string[] names); + + /// Removes all CodeAct-managed tools. + public void ClearTools(); + + // ----- File mounts ----- + + /// Adds file mount configurations. + public void AddFileMounts(params FileMount[] mounts); + + /// Returns the current file mount configurations. + public IReadOnlyList GetFileMounts(); + + /// Removes file mounts by sandbox mount path. + public void RemoveFileMounts(params string[] mountPaths); + + /// Removes all file mount configurations. + public void ClearFileMounts(); + + // ----- Network allow-list ----- + + /// Adds outbound network allow-list entries. + public void AddAllowedDomains(params AllowedDomain[] domains); + + /// Returns the current outbound allow-list entries. + public IReadOnlyList GetAllowedDomains(); + + /// Removes allow-list entries by target. + public void RemoveAllowedDomains(params string[] targets); + + /// Removes all outbound allow-list entries. + public void ClearAllowedDomains(); + + // ----- Lifecycle ----- + + /// Releases the sandbox and all associated native resources. + public void Dispose(); +} +``` + +#### HyperlightCodeActProviderOptions + +```csharp +/// +/// Configuration options for . +/// +public sealed class HyperlightCodeActProviderOptions +{ + /// + /// The sandbox backend to use. Default is Wasm. + /// + public SandboxBackend Backend { get; set; } = SandboxBackend.Wasm; + + /// + /// Path to the guest module (.wasm or .aot file). + /// Required for the Wasm backend; not needed for JavaScript. + /// When null, the provider attempts to locate the default packaged + /// Python guest module. + /// + public string? ModulePath { get; set; } + + /// + /// Guest heap size. Accepts human-readable strings ("50Mi", "2Gi") + /// or raw byte values. Null uses the backend default. + /// + public string? HeapSize { get; set; } + + /// + /// Guest stack size. Accepts human-readable strings ("35Mi") + /// or raw byte values. Null uses the backend default. + /// + public string? StackSize { get; set; } + + /// + /// Initial set of CodeAct-managed tools available inside the sandbox. + /// + public IEnumerable? Tools { get; set; } + + /// + /// Default approval mode for the execute_code tool. + /// Default is . + /// + public CodeActApprovalMode ApprovalMode { get; set; } = CodeActApprovalMode.NeverRequire; + + /// + /// Optional workspace root directory on the host. + /// When set, it is exposed as the sandbox's input directory. + /// + public string? WorkspaceRoot { get; set; } + + /// + /// Initial file mount configurations. + /// + public IEnumerable? FileMounts { get; set; } + + /// + /// Initial outbound network allow-list entries. + /// + public IEnumerable? AllowedDomains { get; set; } + + /// + /// State key used to store provider state in AgentSession.StateBag. + /// Defaults to "HyperlightCodeActProvider". Override when using + /// multiple provider instances on the same agent. + /// + public string? StateKey { get; set; } +} +``` + +#### Provider implementation contract + +The concrete provider plugs into the existing .NET `AIContextProvider` surface from `Microsoft.Agents.AI.Abstractions`. + +Required override: +- `ProvideAIContextAsync(InvokingContext, CancellationToken) -> ValueTask` + +`ProvideAIContextAsync(...)` is responsible for: +- snapshotting the current CodeAct-managed tool registry and capability settings for the run, +- computing the effective approval requirement for `execute_code` from the provider default and the snapshotted tool registry, +- building a short CodeAct guidance instruction string, +- building a run-scoped `execute_code` `AIFunction` from the snapshot, +- optionally wrapping it in `ApprovalRequiredAIFunction` when approval is required, +- and returning an `AIContext` with `Instructions` and `Tools` set. + +These steps run on every invocation rather than once at construction time because the provider supports CRUD mutations between runs, concurrent runs need independent snapshots, and the effective approval and instructions depend on the tool registry state captured at run start. + +The provider overrides `StateKeys` to return the configured `StateKey` from options, enabling multiple provider instances on the same agent without key collisions. + +Mutating the provider after `ProvideAIContextAsync(...)` has captured a run-scoped snapshot is allowed, but it affects subsequent runs only. Provider implementations synchronize state capture and CRUD operations so shared provider instances remain safe across concurrent runs. + +#### AIFunction-to-sandbox tool bridging + +The Hyperlight sandbox's `RegisterTool(name, Func)` accepts a synchronous JSON-in / JSON-out delegate. Provider-owned CodeAct tools are `AIFunction` instances that are async and cancellation-aware. + +Bridging strategy: +- At sandbox initialization time, the provider registers each CodeAct-managed tool with the sandbox using the raw JSON overload: `RegisterTool(name, Func)`. +- When the sandbox guest calls `call_tool("name", ...)`, the bridge delegate: + 1. Deserializes the JSON arguments. + 2. Invokes `AIFunction.InvokeAsync(...)` synchronously (via `GetAwaiter().GetResult()`) since the sandbox FFI callback is inherently synchronous. + 3. Serializes the result back to JSON. +- This sync-over-async bridge is a known pragmatic trade-off constrained by the Hyperlight FFI boundary. It is safe because: + - Sandbox execution already runs on the thread pool (via `Task.Run`). + - The FFI callback runs on a worker thread with no synchronization context. +- If the Hyperlight .NET SDK later adds async tool registration, the bridge should migrate to that. + +#### Runtime behavior + +- `ProvideAIContextAsync(...)` adds a short CodeAct guidance block through `AIContext.Instructions`. +- `ProvideAIContextAsync(...)` adds `execute_code` through `AIContext.Tools`. +- The detailed `call_tool(...)`, sandbox-tool, and capability guidance is carried by the `execute_code` function's `Description`. +- `execute_code` invokes the configured Hyperlight sandbox guest. +- If the current CodeAct tool registry snapshot is non-empty, the runtime injects `call_tool(...)` bound to the provider-owned tool registry. +- The provider does not inspect or mutate the agent's `ChatClientAgentOptions.Tools` or the incoming `AIContext.Tools` to determine its CodeAct tool set. +- The provider snapshots the current CodeAct tool registry and capability state at run start, so later registry and allow-list mutations only affect future runs. +- Interpreter versus tool-enabled behavior is derived from the presence of CodeAct-managed tools. +- `execute_code` is traced like a normal tool invocation within the surrounding agent run. + +#### Backend integration + +Initial public provider: +- `HyperlightCodeActProvider` + +Backend-specific notes: +- **Hyperlight** + - The provider internally creates a `SandboxBuilder` from the options and uses the `Sandbox` API from `HyperlightSandbox.Api`. + - The provider uses snapshot/restore to ensure clean execution state per `execute_code` invocation: a "warm" snapshot is taken after the first no-op initialization run, and restored before each subsequent execution. + - File access maps to Hyperlight Sandbox's `WithInputDir()` / `WithOutputDir()` / `WithTempOutput()` capability model. + - Network access is denied by default and is enabled through `Sandbox.AllowDomain(...)` per-target allow-list entries. + - Guest module resolution: if `ModulePath` is null for the Wasm backend, the provider attempts to locate a packaged Python guest module (equivalent to the Python SDK's `python_guest.path` resolution). + +#### Capability handling + +Capabilities are first-class `HyperlightCodeActProviderOptions` properties and provider-managed CRUD surfaces: +- `WorkspaceRoot` +- `FileMounts` +- `AllowedDomains` + +Enabling access means: +- Configuring `WorkspaceRoot` or any `FileMounts` enables the sandbox filesystem surface exposed through `/input` and `/output`. +- Leaving both `WorkspaceRoot` and `FileMounts` unset means no filesystem surface is configured. +- Adding any `AllowedDomains` entry enables outbound access only for the configured targets; leaving it empty means network access is disabled without a separate network mode flag. + +Backends may implement stricter semantics than these top-level settings. + +#### Execution output representation + +Backend execution output maps to a JSON result string returned from the `execute_code` `AIFunction`: + +```json +{ + "stdout": "Hello world\n", + "stderr": "", + "exit_code": 0, + "success": true +} +``` + +Execution failures should surface readable error text in the `stderr` field and a non-zero `exit_code`. Timeouts, out-of-memory conditions, backend crashes, and similar sandbox failures are all `execute_code` failures and should surface as structured error results. Partial textual or file outputs may be returned only when the backend can report them unambiguously. + +#### `execute_code` input contract + +```json +{ + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Code to execute using the provider's configured backend/runtime behavior." + } + }, + "required": ["code"] +} +``` + +#### Thread safety and concurrency + +- All CRUD methods (`AddTools`, `RemoveTools`, `AddFileMounts`, etc.) are synchronized via an internal lock. +- `ProvideAIContextAsync(...)` acquires the lock to snapshot current state, then releases it before building the run-scoped function. The run-scoped function closes over the immutable snapshot, not mutable provider state. +- Concurrent `execute_code` invocations from different runs use independent sandbox instances or synchronized access to a shared sandbox with snapshot/restore. +- Workspace directories (`WorkspaceRoot`, `FileMounts`) are external shared state: concurrent runs against the same workspace can race on files. This is the user's responsibility to manage (e.g., by using per-run output directories or separate provider instances). + +### HyperlightExecuteCodeFunction + +The provider package also exports a standalone `HyperlightExecuteCodeFunction` for direct-tool scenarios where a provider lifecycle is not needed. This is the .NET equivalent of the Python `HyperlightExecuteCodeTool`. + +```csharp +/// +/// A standalone execute_code AIFunction backed by a Hyperlight sandbox. +/// Use this for manual/static wiring when the AIContextProvider lifecycle +/// is not needed. +/// +public sealed class HyperlightExecuteCodeFunction : IDisposable +{ + /// + /// Creates a new standalone code execution function. + /// + /// Configuration options. + public HyperlightExecuteCodeFunction(HyperlightCodeActProviderOptions options); + + /// + /// Returns this as an AIFunction for direct registration on an agent. + /// When approval is required, the returned function is wrapped in + /// ApprovalRequiredAIFunction. + /// + public AIFunction AsAIFunction(); + + /// + /// Builds a CodeAct instruction string describing the available + /// tools and capabilities. + /// + /// + /// When false, the instructions include full tool descriptions + /// (for use when tools are only accessible through CodeAct). + /// When true, instructions are abbreviated (tools are already + /// visible to the model as direct tools). + /// + public string BuildInstructions(bool toolsVisibleToModel = false); + + /// Releases sandbox resources. + public void Dispose(); +} +``` + +### Internal implementation structure + +The provider and standalone function share internal helpers: + +``` +Microsoft.Agents.AI.Hyperlight/ +├── HyperlightCodeActProvider.cs // AIContextProvider implementation +├── HyperlightCodeActProviderOptions.cs // Options record +├── HyperlightExecuteCodeFunction.cs // Standalone AIFunction for manual wiring +├── FileMount.cs // File mount record +├── AllowedDomain.cs // Network allow-list record +├── CodeActApprovalMode.cs // Approval enum +├── Internal/ +│ ├── SandboxExecutor.cs // Manages sandbox lifecycle, snapshot/restore +│ ├── InstructionBuilder.cs // Builds CodeAct instruction strings +│ └── ToolBridge.cs // AIFunction ↔ Sandbox.RegisterTool adapter +``` + +`SandboxExecutor` encapsulates: +- Creating and configuring a `Sandbox` from options. +- Performing the initial no-op warm-up and snapshot. +- Registering bridged tools via `ToolBridge`. +- Restoring to the clean snapshot before each execution. +- Translating `ExecutionResult` to a JSON string. + +`InstructionBuilder` generates: +- A short CodeAct guidance block for `AIContext.Instructions`. +- A detailed `execute_code` description including `call_tool(...)` signatures and capability documentation. + +`ToolBridge` handles: +- Reflecting `AIFunction` metadata to build the sandbox tool registration. +- The sync-over-async invocation bridge. + +## E2E Code Samples + +### Tool-enabled CodeAct mode + +```csharp +var fetchDocs = AIFunctionFactory.Create(FetchDocs, name: "fetch_docs"); +var queryData = AIFunctionFactory.Create(QueryData, name: "query_data"); +var lookupUser = AIFunctionFactory.Create(LookupUser, name: "lookup_user"); + +var codeact = new HyperlightCodeActProvider(new HyperlightCodeActProviderOptions +{ + Tools = [fetchDocs, queryData], + WorkspaceRoot = "./workdir", + AllowedDomains = [new AllowedDomain("api.github.com", ["GET"])], +}); +codeact.AddTools(lookupUser); + +var sendEmail = AIFunctionFactory.Create(SendEmail, name: "send_email"); + +var agent = chatClient.AsAIAgent( + instructions: "You are a helpful assistant.", + options: new ChatClientAgentOptions + { + Tools = [sendEmail], // direct-only tool + AIContextProviders = [codeact], + }); + +await using var session = await agent.CreateSessionAsync(); +var response = await agent.InvokeAsync("Analyze the latest docs", session); +``` + +### Standard code interpreter mode + +```csharp +var codeact = new HyperlightCodeActProvider(new HyperlightCodeActProviderOptions +{ + WorkspaceRoot = "./data", +}); + +var agent = chatClient.AsAIAgent( + instructions: "You are a code interpreter.", + options: new ChatClientAgentOptions + { + AIContextProviders = [codeact], + }); +``` + +### Manual static wiring (no provider lifecycle) + +When the tool registry and capability configuration are fixed, the provider lifecycle can be skipped entirely. Build the `execute_code` function and instructions once and pass them directly to the agent: + +```csharp +using var executeCode = new HyperlightExecuteCodeFunction( + new HyperlightCodeActProviderOptions + { + Tools = [fetchDocs, queryData], + WorkspaceRoot = "./workdir", + AllowedDomains = [new AllowedDomain("api.github.com", ["GET"])], + }); + +var codeactInstructions = executeCode.BuildInstructions(toolsVisibleToModel: false); + +var agent = chatClient.AsAIAgent( + instructions: $"You are a helpful assistant.\n\n{codeactInstructions}", + options: new ChatClientAgentOptions + { + Tools = [sendEmail, executeCode.AsAIFunction()], + }); +``` + +### With approval required + +```csharp +var sensitiveAction = new ApprovalRequiredAIFunction( + AIFunctionFactory.Create(DeleteRecords, name: "delete_records")); + +var codeact = new HyperlightCodeActProvider(new HyperlightCodeActProviderOptions +{ + Tools = [fetchDocs, sensitiveAction], // sensitiveAction triggers approval +}); + +// execute_code will be wrapped in ApprovalRequiredAIFunction because +// at least one managed tool (delete_records) requires approval. +var agent = chatClient.AsAIAgent( + instructions: "You are a helpful assistant.", + options: new ChatClientAgentOptions + { + AIContextProviders = [codeact], + }); +``` + +## Relationship to hyperlight-sandbox .NET SDK + +This design depends on the .NET SDK being added in [hyperlight-dev/hyperlight-sandbox#46](https://github.com/hyperlight-dev/hyperlight-sandbox/pull/46). Key types consumed from that SDK: + +| hyperlight-sandbox type | Used for | +|---|---| +| `Sandbox` | Core sandbox lifecycle: `Run()`, `RegisterTool()`, `AllowDomain()`, `Snapshot()`, `Restore()` | +| `SandboxBuilder` | Fluent sandbox construction from provider options | +| `SandboxBackend` | Backend selection (Wasm, JavaScript) | +| `ExecutionResult` | Capturing stdout, stderr, exit code from guest execution | +| `SandboxSnapshot` | Checkpoint/restore for clean state per execution | + +The provider package (`Microsoft.Agents.AI.Hyperlight`) takes a NuGet dependency on `Hyperlight.HyperlightSandbox.Api` and `Microsoft.Extensions.AI.Abstractions`. It does **not** depend on `HyperlightSandbox.Extensions.AI` (`CodeExecutionTool`) — the provider implements its own sandbox lifecycle management with run-scoped snapshots to support concurrent invocations safely. + +## Package structure + +The CodeAct Hyperlight provider ships as an optional NuGet package: +- **Package**: `Microsoft.Agents.AI.Hyperlight` +- **Dependencies**: + - `Microsoft.Agents.AI.Abstractions` (for `AIContextProvider`, `AIContext`) + - `Microsoft.Extensions.AI.Abstractions` (for `AIFunction`, `ApprovalRequiredAIFunction`) + - `Hyperlight.HyperlightSandbox.Api` (for sandbox API) +- **Target framework**: `net8.0` + +This keeps CodeAct and its native sandbox dependencies optional — users who do not need CodeAct do not take on the Hyperlight installation and dependency footprint. + +## Open questions + +1. **Guest module distribution**: How should the default Python guest module (`.aot` file) be distributed for .NET consumers? Options include a separate NuGet package with native assets, a runtime download, or requiring users to build/provide their own. +2. **Async tool registration**: If the Hyperlight .NET SDK adds async tool callback support in a future release, the sync-over-async bridge should be replaced. This is tracked as a known technical debt item. +3. **Output file access**: The Hyperlight sandbox exposes `GetOutputFiles()` and `OutputPath` for retrieving files written by guest code. The initial design returns these as part of the JSON result. A future iteration could surface output files as framework-native content (e.g., `DataContent` or URI references). +4. **Multiple sandbox instances for concurrency**: The current design uses synchronized access to a single sandbox with snapshot/restore. An alternative pooling strategy (one sandbox per concurrent run) could improve throughput at the cost of memory. This is deferred to implementation time. diff --git a/docs/features/code_act/python-implementation.md b/docs/features/code_act/python-implementation.md new file mode 100644 index 0000000000..7f45190d33 --- /dev/null +++ b/docs/features/code_act/python-implementation.md @@ -0,0 +1,385 @@ +# CodeAct Python implementation + +This document describes the Python realization of the CodeAct design in +[`docs/decisions/0024-codeact-integration.md`](../../decisions/0024-codeact-integration.md). + +This document is intentionally focused on the Python design and public API surface. +The initial public Python type described here is `HyperlightCodeActProvider`. Future Python backends, such as Monty, should follow the same conceptual model with their own concrete provider types rather than through a public abstract base class or a public executor parameter. + +## What is the goal of this feature? + +Goals: +- Python developers can enable CodeAct through a `ContextProvider`-based integration. +- Developers can configure a provider-owned CodeAct tool set that is separate from the agent's direct `tools=` surface. +- Developers can use the same `execute_code` concept for both tool-enabled CodeAct and a standard code interpreter tool implementation. +- Developers can swap execution backends over time, starting with Hyperlight while keeping room for alternatives such as Pydantic's Monty. +- Developers can configure execution capabilities such as workspace mounts and outbound network allow lists in a portable way. + +Success Metric: +- Python samples exist for both a tool-enabled CodeAct mode and a standard interpreter mode. + +Implementation-free outcome: +- A Python developer can attach a backend-specific CodeAct provider, choose which tools are available inside CodeAct, and configure execution capabilities without rewriting the function invocation loop. + +## What is the problem being solved? + +The cross-SDK problem statement and decision rationale live in the [ADR](../../decisions/0024-codeact-integration.md). The items below narrow that statement to Python-specific design concerns: + +- Today, the easiest way to prototype CodeAct is to infer or reshape the agent's direct tool surface, which is fragile and hard to reason about. +- In Python, inferring a CodeAct tool surface from generic agent tool configuration is fragile and hard to reason about. +- There is no first-class Python design that simultaneously covers Hyperlight-backed CodeAct now, future backend-specific providers such as Monty, and both tool-enabled and interpreter modes. +- Sandbox capabilities such as mounted file access and outbound network access need a portable configuration model instead of ad hoc backend-specific wiring. +- Approval behavior needs to be explicit and configurable, especially when CodeAct and direct tool calling may both be available. + +## API Changes + +### CodeAct contract + +#### Terminology + +- **CodeAct** is the primary term. +- **Code mode**, **codemode**, and **programmatic tool calling** refer to the same concept in this document. +- `execute_code` is the model-facing tool name used by the initial Python providers in this spec. + +#### Provider-owned CodeAct tool registry + +A concrete Python CodeAct provider owns the set of tools available through `call_tool(...)` inside CodeAct. + +Rules: +- Only tools explicitly configured on the concrete provider instance are available inside CodeAct. +- The provider must not infer its CodeAct-managed tool set from the agent's direct `tools=` configuration. +- Exclusive versus mixed behavior is achieved by where tools are configured, not by rewriting the agent's direct tool list. + +Implications: +- **CodeAct-only tool**: configured on the concrete CodeAct provider only. +- **Direct-only tool**: configured on the agent only. +- **Tool available both ways**: configured on both the agent and the concrete CodeAct provider. + +#### Managing tools and capabilities after provider construction + +There is no separate runtime setup object in the Python design. CodeAct tools, file mounts, and outbound network allow-list state are managed directly on the provider through CRUD-style registry methods. + +Preferred pattern: +- `add_tools(...) -> None` +- `get_tools() -> Sequence[ToolTypes]` +- `remove_tool(...) -> None` +- `clear_tools() -> None` +- `add_file_mounts(...) -> None` +- `get_file_mounts() -> Sequence[FileMount]` +- `remove_file_mount(...) -> None` +- `clear_file_mounts() -> None` +- `add_allowed_domains(...) -> None` +- `get_allowed_domains() -> Sequence[AllowedDomain]` +- `remove_allowed_domain(...) -> None` +- `clear_allowed_domains() -> None` + +Requirements: +- The provider-owned CodeAct tool registry is keyed by tool name. +- `add_tools(...)` adds new tools and replaces an existing provider-owned registration when the same tool name is added again. +- `get_tools()` returns the provider's current configured CodeAct tool registry. +- `remove_tool(...)` removes provider-owned CodeAct tools by name. +- `clear_tools()` removes all provider-owned CodeAct tools. +- File mounts are keyed by sandbox mount path. +- `add_file_mounts(...)` adds new file mounts and replaces an existing mount when the same mount path is added again. +- `get_file_mounts()` returns the provider's current configured file mounts. +- `remove_file_mount(...)` removes file mounts by mount path. +- `clear_file_mounts()` removes all configured file mounts. +- Allowed domains are keyed by normalized target string. +- `add_allowed_domains(...)` adds allow-list entries and replaces an existing entry when the same target is added again. +- `get_allowed_domains()` returns the current outbound allow-list entries. +- `remove_allowed_domain(...)` removes allow-list entries by target. +- `clear_allowed_domains()` removes all configured allow-list entries. +- Tool, file-mount, and network-allow-list mutations affect subsequent runs only; runs already in progress keep the snapshot captured at run start. +- The provider must snapshot its effective tool registry and capability state at the start of each run so concurrent execution remains deterministic. + +#### Approval model + +The initial Python design follows the ADR's initial approval decision and reuses the existing tool approval vocabulary from `agent_framework._tools`: + +- `approval_mode="always_require"` +- `approval_mode="never_require"` + +The provider exposes a default `approval_mode` for `execute_code`. + +Effective `execute_code` approval is computed as follows: + +- If the provider default is `always_require`, `execute_code` requires approval. +- If the provider default is `never_require`, the provider evaluates the provider-owned CodeAct tool registry snapshot for that run. +- If every provider-owned CodeAct tool in that snapshot is `never_require`, `execute_code` is `never_require`. +- If any provider-owned CodeAct tool in that snapshot is `always_require`, `execute_code` is `always_require`, even if the generated code may not call that tool. +- Provider-owned tool calls made through `call_tool(...)` during that execution run use the approval already determined for `execute_code`. +- Direct-only agent tools are excluded from this calculation. +- File and network capabilities do not create a separate runtime approval check in the initial model; configuring them on the provider, including adding file mounts or outbound network allow-list entries, is itself the approval for those capabilities. + +This is intentionally conservative and matches the shape of the current function-tool approval flow, where `FunctionTool` uses `always_require` / `never_require` and the auto-invocation loop escalates the whole batch if any called tool requires approval. + +If one sensitive provider-owned tool causes `execute_code` to require approval more often than desired, the mitigation is to keep that tool direct-only or expose it through a different CodeAct provider/tool surface. The initial model does not try to infer whether generated code will actually call that tool before approval. + +If the framework later standardizes pre-execution inspection or nested per-tool approvals, the Python provider surface can grow to expose that explicitly. The initial design does not assume that those extra modes are required. + +#### Shared execution flow + +On each run: +1. Resolve the provider's backend/runtime behavior, capabilities, provider default `approval_mode`, and provider-owned tool registry. +2. Compute the effective approval requirement for `execute_code` from the provider default plus the provider-owned tool registry snapshot. +3. Build provider-defined instructions. +4. Add `execute_code` to the model-facing tool surface. +5. Invoke the underlying model. +6. When `execute_code` is called, create or reuse an execution environment keyed by provider type, backend setup identity, capability configuration, and provider-owned tool signature. +7. If the current provider mode exposes host tools, expose `call_tool(...)` bound only to the provider-owned tool registry. +8. Execute code and convert results to framework-native content objects. + +Caching rules: +- Backends that support snapshots may cache a reusable clean snapshot. +- Backends that do not support snapshots may still cache warm initialization artifacts. +- No mutable per-run execution state may be shared across concurrent runs. +- In-memory interpreter state does not persist across separate `execute_code` calls. +- Configured workspace files, mounted files, and any writable artifact/output area are the supported persistence mechanism across calls when the backend exposes them. + +### Python public API + +#### Core types + +```python +class FileMount(NamedTuple): + host_path: str | Path + mount_path: str + +FileMountInput = str | tuple[str | Path, str] | FileMount + + +class AllowedDomain(NamedTuple): + target: str + methods: tuple[str, ...] | None = None + + +AllowedDomainInput = str | tuple[str, str | Sequence[str]] | AllowedDomain + + +class HyperlightCodeActProvider(ContextProvider): + def __init__( + self, + source_id: str = "hyperlight_codeact", + *, + backend: str = "wasm", + module: str | None = "python_guest.path", + module_path: str | None = None, + tools: ToolTypes | None = None, + approval_mode: Literal["always_require", "never_require"] = "never_require", + workspace_root: Path | None = None, + file_mounts: Sequence[FileMountInput] = (), + allowed_domains: Sequence[AllowedDomainInput] = (), + ) -> None: ... + + def add_tools(self, tools: ToolTypes | Sequence[ToolTypes]) -> None: ... + def get_tools(self) -> Sequence[ToolTypes]: ... + def remove_tool(self, name: str) -> None: ... + def clear_tools(self) -> None: ... + def add_file_mounts(self, mounts: FileMountInput | Sequence[FileMountInput]) -> None: ... + def get_file_mounts(self) -> Sequence[FileMount]: ... + def remove_file_mount(self, mount_path: str) -> None: ... + def clear_file_mounts(self) -> None: ... + def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None: ... + def get_allowed_domains(self) -> Sequence[AllowedDomain]: ... + def remove_allowed_domain(self, domain: str) -> None: ... + def clear_allowed_domains(self) -> None: ... +``` + +`file_mounts` accepts three equivalent input forms: +- `"data/report.csv"` uses the same relative path on the host and in the sandbox. +- `("fixtures/users.json", "data/users.json")` or `(Path("fixtures/users.json"), "data/users.json")` uses distinct host and sandbox paths. +- `FileMount(Path("fixtures/users.json"), "data/users.json")` is the named-tuple form of the explicit pair. + +`allowed_domains` accepts three equivalent input forms: +- `"github.com"` allows that target with all backend-supported methods. +- `("github.com", "GET")` or `("github.com", ["GET", "HEAD"])` uses an explicit per-target method list. +- `AllowedDomain("github.com", ("GET", "HEAD"))` is the named-tuple form of the explicit entry. + +No public abstract `CodeActContextProvider` base or public `executor=` parameter is required for the initial Python API. + +The initial alpha package also exports a standalone `HyperlightExecuteCodeTool` +for direct-tool scenarios where a provider is not needed. That standalone tool +should advertise `call_tool(...)`, the registered sandbox tools, and capability +state through its own `description` rather than requiring separate agent +instructions. + +Provider modes: +- If no CodeAct-managed tools are configured, `HyperlightCodeActProvider` uses interpreter-style behavior. +- If one or more CodeAct-managed tools are configured, `HyperlightCodeActProvider` uses tool-enabled behavior. + +#### Python provider implementation contract + +The concrete provider plugs into the existing Python `ContextProvider` surface from `agent_framework._sessions`. + +The Hyperlight package also depends on a small set of core hooks that must remain available from `agent-framework-core`: +- `ContextProvider.before_run(...)` +- `SessionContext.extend_instructions(...)` +- `SessionContext.extend_tools(...)` +- per-run runtime tool access via `SessionContext.options["tools"]` +- the shared `ApprovalMode` vocabulary used by `FunctionTool` + +Required lifecycle hook: +- `before_run(*, agent, session, context, state) -> None` + +Optional lifecycle hook: +- `after_run(*, agent, session, context, state) -> None` + +`before_run(...)` is responsible for: +- snapshotting the current CodeAct-managed tool registry and capability settings for the run, +- computing the effective approval requirement for `execute_code` from the provider default and the snapshotted tool registry, +- adding a short CodeAct guidance block, +- adding `execute_code` to the run through `SessionContext.extend_tools(...)`, +- and wiring any backend-specific execution state needed for the run. + +These steps run on every invocation rather than once at construction time because the provider supports CRUD mutations between runs, concurrent runs need independent snapshots, and the effective approval and instructions depend on the tool registry state captured at run start. When the tool registry and capability configuration are fixed for the lifetime of the agent, the manual wiring pattern (see `codeact_manual_wiring.py`) can be used instead, which passes the tool and instructions directly to the `Agent` constructor and avoids the per-run provider lifecycle entirely. + +If the provider stores anything in `state`, that value must stay JSON-serializable. + +Mutating the provider after `before_run(...)` has captured a run-scoped snapshot is allowed, but it affects subsequent runs only. Provider implementations should synchronize state capture and CRUD operations so shared provider instances remain safe across concurrent runs. + +`after_run(...)` is responsible for any backend-specific cleanup or post-processing that must happen after the model invocation completes. + +If shared internal helpers are introduced later for multiple concrete providers, they should standardize responsibilities for: +- building instructions, +- computing effective approval, +- configuring file access, +- configuring network access, +- preparing or restoring execution state, +- executing code, +- and converting backend output into framework-native `Content`. + +#### Runtime behavior + +- `before_run(...)` adds a short CodeAct guidance block through `SessionContext.extend_instructions(...)`. +- `before_run(...)` adds `execute_code` through `SessionContext.extend_tools(...)`. +- The detailed `call_tool(...)`, sandbox-tool, and capability guidance is carried by `execute_code.description`. +- `execute_code` invokes the configured Hyperlight sandbox guest. +- If the current CodeAct tool registry is non-empty, the runtime injects `call_tool(...)` bound to the provider-owned tool registry. +- The provider does not inspect or mutate `Agent.default_options["tools"]` or `context.options["tools"]` to determine its CodeAct tool set. +- The provider snapshots the current CodeAct tool registry and capability state at run start, so later registry and allow-list mutations only affect future runs. +- Interpreter versus tool-enabled behavior is derived from the concrete provider and the presence of CodeAct-managed tools, not from a separate public profile object. +- `execute_code` should be traced like a normal tool invocation within the surrounding agent run, and provider-owned tool calls executed through `call_tool(...)` should continue to emit ordinary tool invocation telemetry. + +#### Backend integration + +Initial public provider: +- `HyperlightCodeActProvider` + +Backend-specific notes: +- **Hyperlight** + - Provider construction needs a guest artifact via `module`, which may be a packaged guest module name or a path to a compiled guest artifact. + - File access maps naturally to Hyperlight Sandbox's read-only `/input` and writable `/output` capability model. + - Network access is denied by default and is enabled through per-target allow-list entries. +- **Monty** + - A future `MontyCodeActProvider` should be a separate public type rather than a `HyperlightCodeActProvider` mode. + - Monty does not expose built-in filesystem or network access directly inside the interpreter. + - File and URL access are mediated through host-provided external functions, so a Monty provider would need to translate provider settings into virtual files and allow-checked callbacks. + - Monty setup may also include backend-specific inputs such as `script_name`, optional type-check stubs, or restored snapshots. + +#### Capability handling + +Capabilities are first-class `HyperlightCodeActProvider` init parameters and provider-managed CRUD surfaces: +- `workspace_root` +- `file_mounts` +- `allowed_domains` + +Concrete providers should normalize these settings internally. Hyperlight can map them directly to sandbox capabilities, while Monty must enforce them through host-mediated file and network functions and may apply stricter URL-level checks than the public provider surface expresses. + +Expected management split: +- `workspace_root` remains a direct configuration value on the provider, +- file mounts are managed through provider CRUD methods, +- outbound allow-list entries are managed through provider CRUD methods. + +Enabling access means: +- Configuring `workspace_root` or any `file_mounts` enables the sandbox filesystem surface exposed through `/input` and `/output`. +- Leaving both `workspace_root` and `file_mounts` unset means no filesystem surface is configured. +- Adding any `allowed_domains` entry enables outbound access only for the configured targets; leaving it empty means network access is disabled without a separate `network_mode` flag. +- A string target allows all backend-supported methods for that target; an explicit tuple or `AllowedDomain` entry narrows the methods for that target. + +Backends may implement stricter semantics than these top-level settings. For example, Hyperlight naturally maps file access to `/input` and `/output`, while Monty would enforce equivalent policy through host-provided callbacks rather than direct interpreter I/O. + +#### Execution output representation + +Backend execution output should be translated into existing AF `Content` values rather than a custom `CodeActExecutionResult` type. + +Use the existing content model from `agent_framework._types`, for example: +- `Content.from_code_interpreter_tool_result(outputs=[...])` to surface the overall result of sandboxed code execution, +- `Content.from_text(...)` for plain textual output, +- `Content.from_data(...)` or `Content.from_uri(...)` for generated files or binary artifacts, +- `Content.from_error(...)` for execution failures, +- and `Content.from_function_result(..., result=list[Content])` when surfacing the final result of `execute_code` through the normal tool result path. + +#### `execute_code` input contract + +```json +{ + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Code to execute using the provider's configured backend/runtime behavior." + } + }, + "required": ["code"] +} +``` + +Execution failures should surface readable error text and structured error `Content`, not a custom backend result object. + +Timeouts, out-of-memory conditions, backend crashes, and similar sandbox failures are all `execute_code` failures and should surface as structured error content. Partial textual or file outputs may be returned only when the backend can report them unambiguously; callers should not rely on partial-output recovery as a portable contract. + +## E2E Code Samples + +### Tool-enabled CodeAct mode + +```python +codeact = HyperlightCodeActProvider( + tools=[fetch_docs, query_data], + workspace_root="./workdir", + allowed_domains=[("api.github.com", "GET")], +) +codeact.add_tools([lookup_user]) + +agent = Agent( + client=client, + name="assistant", + tools=[send_email], # direct-only tool + context_providers=[codeact], +) +``` + +### Standard code interpreter mode + +```python +codeact = HyperlightCodeActProvider( + workspace_root="./data", +) + +agent = Agent( + client=client, + name="interpreter", + context_providers=[codeact], +) +``` + +### Manual static wiring (no per-run provider lifecycle) + +When the tool registry and capability configuration are fixed, the provider lifecycle can be skipped entirely. Build the `execute_code` tool and instructions once and pass them directly to the agent: + +```python +execute_code = HyperlightExecuteCodeTool( + tools=[fetch_docs, query_data], + workspace_root="./workdir", + allowed_domains=[("api.github.com", "GET")], + approval_mode="never_require", +) + +codeact_instructions = execute_code.build_instructions(tools_visible_to_model=False) + +agent = Agent( + client=client, + name="assistant", + instructions=f"You are a helpful assistant.\n\n{codeact_instructions}", + tools=[send_email, execute_code], +) +``` diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props index 6817ac3fe0..63800e8a33 100644 --- a/dotnet/Directory.Packages.props +++ b/dotnet/Directory.Packages.props @@ -7,13 +7,16 @@ - 13.0.2 + 13.1.0 + + + @@ -48,12 +51,12 @@ - - - - - - + + + + + + @@ -71,18 +74,18 @@ - - - - - - + + + + + + - + - + - + diff --git a/dotnet/agent-framework-dotnet.slnx b/dotnet/agent-framework-dotnet.slnx index 00a1882018..96c2411e3b 100644 --- a/dotnet/agent-framework-dotnet.slnx +++ b/dotnet/agent-framework-dotnet.slnx @@ -4,6 +4,9 @@ + + + @@ -37,6 +40,12 @@ + + + + + + @@ -152,6 +161,7 @@ + @@ -173,6 +183,7 @@ + @@ -540,6 +551,7 @@ + diff --git a/dotnet/agent-framework-release.slnf b/dotnet/agent-framework-release.slnf index 98f9dcf887..8cc97dc8cb 100644 --- a/dotnet/agent-framework-release.slnf +++ b/dotnet/agent-framework-release.slnf @@ -28,7 +28,8 @@ "src\\Microsoft.Agents.AI.Workflows.Declarative\\Microsoft.Agents.AI.Workflows.Declarative.csproj", "src\\Microsoft.Agents.AI.Workflows.Generators\\Microsoft.Agents.AI.Workflows.Generators.csproj", "src\\Microsoft.Agents.AI.Workflows\\Microsoft.Agents.AI.Workflows.csproj", - "src\\Microsoft.Agents.AI\\Microsoft.Agents.AI.csproj" + "src\\Microsoft.Agents.AI\\Microsoft.Agents.AI.csproj", + "src\\Aspire.Hosting.AgentFramework.DevUI\\Aspire.Hosting.AgentFramework.DevUI.csproj" ] } } diff --git a/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/Agent_OpenAI_Step06_CodeInterpreterFileDownload.csproj b/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/Agent_OpenAI_Step06_CodeInterpreterFileDownload.csproj new file mode 100644 index 0000000000..06380e8016 --- /dev/null +++ b/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/Agent_OpenAI_Step06_CodeInterpreterFileDownload.csproj @@ -0,0 +1,15 @@ + + + + Exe + net10.0 + + enable + enable + + + + + + + diff --git a/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/Program.cs b/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/Program.cs new file mode 100644 index 0000000000..c01ff4304e --- /dev/null +++ b/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/Program.cs @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft. All rights reserved. + +// This sample shows how to download files generated by Code Interpreter using the Containers API. +// Code Interpreter generates files inside containers (cfile_ / cntr_ IDs) which cannot be +// downloaded via the standard Files API. Use ContainerClient instead. + +#pragma warning disable OPENAI001 + +using System.ClientModel; +using Microsoft.Agents.AI; +using Microsoft.Extensions.AI; +using OpenAI; +using OpenAI.Containers; +using OpenAI.Responses; + +string apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new InvalidOperationException("OPENAI_API_KEY is not set."); +string model = Environment.GetEnvironmentVariable("OPENAI_CHAT_MODEL_NAME") ?? "gpt-4o-mini"; + +var openAIClient = new OpenAIClient(new ApiKeyCredential(apiKey)); + +// Create an agent with Code Interpreter tool enabled +AIAgent agent = openAIClient + .GetResponsesClient() + .AsAIAgent( + model: model, + instructions: "You are a helpful assistant that can generate files using code.", + name: "CodeInterpreterAgent", + tools: [new HostedCodeInterpreterTool()]); + +// Ask the agent to generate a file +AgentResponse response = await agent.RunAsync( + "Create a CSV file with the multiplication times tables from 1 to 12. Include headers."); + +// Display the text response +foreach (TextContent textContent in response.Messages.SelectMany(x => x.Contents).OfType()) +{ + Console.WriteLine(textContent.Text); +} + +// Extract container file citations from response annotations and download +ContainerClient containerClient = openAIClient.GetContainerClient(); + +HashSet downloadedFiles = []; +bool foundContainerFiles = false; + +foreach (AIContent content in response.Messages.SelectMany(x => x.Contents)) +{ + if (content.Annotations is null) + { + continue; + } + + foreach (AIAnnotation annotation in content.Annotations) + { + // Container files from Code Interpreter have ContainerFileCitationMessageAnnotation as raw representation + if (annotation is CitationAnnotation citation + && citation.RawRepresentation is ContainerFileCitationMessageAnnotation containerCitation) + { + foundContainerFiles = true; + + // Deduplicate by container+file ID in case the same file is cited multiple times + string key = $"{containerCitation.ContainerId}/{containerCitation.FileId}"; + if (!downloadedFiles.Add(key)) + { + continue; + } + + Console.WriteLine($"\nDownloading container file: {containerCitation.Filename}"); + Console.WriteLine($" Container ID: {containerCitation.ContainerId}"); + Console.WriteLine($" File ID: {containerCitation.FileId}"); + + BinaryData fileData = await containerClient.DownloadContainerFileAsync( + containerCitation.ContainerId, + containerCitation.FileId); + + // Sanitize filename to prevent path traversal + string safeFilename = Path.GetFileName(containerCitation.Filename); + string outputPath = Path.Combine(Directory.GetCurrentDirectory(), safeFilename); + await File.WriteAllBytesAsync(outputPath, fileData.ToArray()); + Console.WriteLine($" Saved to: {outputPath}"); + } + } +} + +if (!foundContainerFiles) +{ + Console.WriteLine("\nNo container file citations found in the response."); + Console.WriteLine("The model may not have generated a downloadable file for this prompt."); +} diff --git a/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/README.md b/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/README.md new file mode 100644 index 0000000000..4ba457d0f8 --- /dev/null +++ b/dotnet/samples/02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/README.md @@ -0,0 +1,51 @@ +# Code Interpreter File Download (OpenAI) + +This sample demonstrates how to download files generated by Code Interpreter when using the OpenAI Responses API. + +## What this sample demonstrates + +- Creating an agent with Code Interpreter tool using `ResponsesClient.AsAIAgent()` +- Generating files through Code Interpreter (e.g., CSV, Excel, images) +- Extracting container file citations from agent response annotations +- Downloading container files using the `ContainerClient` API + +## Container files vs regular files + +When Code Interpreter generates a file, the file is stored inside a **container** with a `cntr_` prefixed ID. The file itself gets a `cfile_` prefixed ID. + +These container files **cannot** be downloaded using the standard Files API (`GetOpenAIFileClient`), which returns 404 for `cfile_` IDs. Instead, you must use the **Containers API** (`GetContainerClient`) to download them: + +```csharp +// ❌ This does NOT work for container files +var filesClient = openAIClient.GetOpenAIFileClient(); +await filesClient.DownloadFileAsync("cfile_..."); // Returns 404 + +// ✅ Use ContainerClient instead +var containerClient = openAIClient.GetContainerClient(); +await containerClient.DownloadContainerFileAsync("cntr_...", "cfile_..."); +``` + +The container ID and file ID are available from the `ContainerFileCitationMessageAnnotation` annotation in the response, accessible via `CitationAnnotation.RawRepresentation`. + +## Prerequisites + +- .NET 10 SDK or later +- OpenAI API key with access to a model that supports Code Interpreter + +Set the following environment variables: + +```powershell +$env:OPENAI_API_KEY="sk-..." +$env:OPENAI_CHAT_MODEL_NAME="gpt-4o-mini" # Optional, defaults to gpt-4o-mini +``` + +## Run the sample + +```powershell +dotnet run +``` + +## See also + +- [Code Interpreter File Download with Foundry](../../../02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/) — same scenario using Microsoft Foundry +- [Code Interpreter](../../../02-agents/AgentsWithFoundry/Agent_Step14_CodeInterpreter/) — Code Interpreter without file download diff --git a/dotnet/samples/02-agents/AgentWithOpenAI/README.md b/dotnet/samples/02-agents/AgentWithOpenAI/README.md index 74a44600bf..78955a72af 100644 --- a/dotnet/samples/02-agents/AgentWithOpenAI/README.md +++ b/dotnet/samples/02-agents/AgentWithOpenAI/README.md @@ -14,4 +14,5 @@ Agent Framework provides additional support to allow OpenAI developers to use th |[Using Reasoning Capabilities](./Agent_OpenAI_Step02_Reasoning/)|This sample demonstrates how to create an AI agent with reasoning capabilities using OpenAI's reasoning models and response types.| |[Creating an Agent from a ChatClient](./Agent_OpenAI_Step03_CreateFromChatClient/)|This sample demonstrates how to create an AI agent directly from an OpenAI.Chat.ChatClient instance using OpenAIChatClientAgent.| |[Creating an Agent from an OpenAIResponseClient](./Agent_OpenAI_Step04_CreateFromOpenAIResponseClient/)|This sample demonstrates how to create an AI agent directly from an OpenAI.Responses.OpenAIResponseClient instance using OpenAIResponseClientAgent.| -|[Managing Conversation State](./Agent_OpenAI_Step05_Conversation/)|This sample demonstrates how to maintain conversation state across multiple turns using the AgentSession for context continuity.| \ No newline at end of file +|[Managing Conversation State](./Agent_OpenAI_Step05_Conversation/)|This sample demonstrates how to maintain conversation state across multiple turns using the AgentSession for context continuity.| +|[Code Interpreter File Download](./Agent_OpenAI_Step06_CodeInterpreterFileDownload/)|This sample demonstrates how to download files generated by Code Interpreter using the Containers API (`cfile_`/`cntr_` IDs).| \ No newline at end of file diff --git a/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/Agent_Step24_CodeInterpreterFileDownload.csproj b/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/Agent_Step24_CodeInterpreterFileDownload.csproj new file mode 100644 index 0000000000..129c9026a2 --- /dev/null +++ b/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/Agent_Step24_CodeInterpreterFileDownload.csproj @@ -0,0 +1,19 @@ + + + + Exe + net10.0 + + enable + enable + + + + + + + + + + + diff --git a/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/Program.cs b/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/Program.cs new file mode 100644 index 0000000000..79fac0d5d4 --- /dev/null +++ b/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/Program.cs @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft. All rights reserved. + +// This sample shows how to download files generated by Code Interpreter using Microsoft Foundry. +// Code Interpreter generates files inside containers (cfile_ / cntr_ IDs) which cannot be +// downloaded via the standard Files API. Use ContainerClient from the project's OpenAI client instead. + +#pragma warning disable OPENAI001 + +using Azure.AI.Projects; +using Azure.Identity; +using Microsoft.Agents.AI; +using Microsoft.Extensions.AI; +using OpenAI.Responses; + +string endpoint = Environment.GetEnvironmentVariable("AZURE_AI_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_AI_PROJECT_ENDPOINT is not set."); +string deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_MODEL_DEPLOYMENT_NAME") ?? "gpt-4o-mini"; + +// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production. +// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid +// latency issues, unintended credential probing, and potential security risks from fallback mechanisms. +AIProjectClient aiProjectClient = new(new Uri(endpoint), new DefaultAzureCredential()); + +// Create an agent with Code Interpreter tool enabled +AIAgent agent = aiProjectClient.AsAIAgent( + deploymentName, + instructions: "You are a helpful assistant that can generate files using code.", + name: "CodeInterpreterAgent", + tools: [new HostedCodeInterpreterTool()]); + +// Ask the agent to generate a file +AgentResponse response = await agent.RunAsync( + "Create a CSV file with the multiplication times tables from 1 to 12. Include headers."); + +// Display the text response +foreach (TextContent textContent in response.Messages.SelectMany(x => x.Contents).OfType()) +{ + Console.WriteLine(textContent.Text); +} + +// Extract container file citations from response annotations and download. +// AIProjectClient.GetProjectOpenAIClient() returns a ProjectOpenAIClient (inherits from OpenAI.OpenAIClient) +// which supports GetContainerClient(), unlike AzureOpenAIClient which does not. +var containerClient = aiProjectClient.GetProjectOpenAIClient().GetContainerClient(); + +HashSet downloadedFiles = []; +bool foundContainerFiles = false; + +foreach (AIContent content in response.Messages.SelectMany(x => x.Contents)) +{ + if (content.Annotations is null) + { + continue; + } + + foreach (AIAnnotation annotation in content.Annotations) + { + // Container files from Code Interpreter have ContainerFileCitationMessageAnnotation as raw representation + if (annotation is CitationAnnotation citation + && citation.RawRepresentation is ContainerFileCitationMessageAnnotation containerCitation) + { + foundContainerFiles = true; + + // Deduplicate by container+file ID in case the same file is cited multiple times + string key = $"{containerCitation.ContainerId}/{containerCitation.FileId}"; + if (!downloadedFiles.Add(key)) + { + continue; + } + + Console.WriteLine($"\nDownloading container file: {containerCitation.Filename}"); + Console.WriteLine($" Container ID: {containerCitation.ContainerId}"); + Console.WriteLine($" File ID: {containerCitation.FileId}"); + + BinaryData fileData = await containerClient.DownloadContainerFileAsync( + containerCitation.ContainerId, + containerCitation.FileId); + + // Sanitize filename to prevent path traversal + string safeFilename = Path.GetFileName(containerCitation.Filename); + string outputPath = Path.Combine(Directory.GetCurrentDirectory(), safeFilename); + await File.WriteAllBytesAsync(outputPath, fileData.ToArray()); + Console.WriteLine($" Saved to: {outputPath}"); + } + } +} + +if (!foundContainerFiles) +{ + Console.WriteLine("\nNo container file citations found in the response."); + Console.WriteLine("The model may not have generated a downloadable file for this prompt."); +} diff --git a/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/README.md b/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/README.md new file mode 100644 index 0000000000..4d50b98ca8 --- /dev/null +++ b/dotnet/samples/02-agents/AgentsWithFoundry/Agent_Step24_CodeInterpreterFileDownload/README.md @@ -0,0 +1,56 @@ +# Code Interpreter File Download (Microsoft Foundry) + +This sample demonstrates how to download files generated by Code Interpreter when using Microsoft Foundry. + +## What this sample demonstrates + +- Creating an agent with Code Interpreter tool using `AIProjectClient.AsAIAgent()` +- Generating files through Code Interpreter (e.g., CSV, Excel, images) +- Extracting container file citations from agent response annotations +- Downloading container files using the `ContainerClient` via `AIProjectClient.GetProjectOpenAIClient()` + +## Container files vs regular files + +When Code Interpreter generates a file, the file is stored inside a **container** with a `cntr_` prefixed ID. The file itself gets a `cfile_` prefixed ID. + +These container files **cannot** be downloaded using the standard Files API (`GetOpenAIFileClient`), which returns 404 for `cfile_` IDs. Instead, you must use the **Containers API** to download them. + +### Getting the ContainerClient with Foundry + +`AzureOpenAIClient.GetContainerClient()` is not supported and throws `InvalidOperationException`. Instead, use the project's OpenAI client which inherits directly from `OpenAI.OpenAIClient`: + +```csharp +// ❌ AzureOpenAIClient does not support ContainerClient +var azureClient = new AzureOpenAIClient(endpoint, credential); +azureClient.GetContainerClient(); // Throws InvalidOperationException + +// ✅ Use AIProjectClient's project OpenAI client +var containerClient = aiProjectClient.GetProjectOpenAIClient().GetContainerClient(); +await containerClient.DownloadContainerFileAsync("cntr_...", "cfile_..."); +``` + +The container ID and file ID are available from the `ContainerFileCitationMessageAnnotation` annotation in the response, accessible via `CitationAnnotation.RawRepresentation`. + +## Prerequisites + +- .NET 10 SDK or later +- Microsoft Foundry service endpoint and deployment configured +- Azure CLI installed and authenticated (`az login`) + +Set the following environment variables: + +```powershell +$env:AZURE_AI_PROJECT_ENDPOINT="https://your-foundry-service.services.ai.azure.com/api/projects/your-foundry-project" +$env:AZURE_AI_MODEL_DEPLOYMENT_NAME="gpt-4o-mini" # Optional, defaults to gpt-4o-mini +``` + +## Run the sample + +```powershell +dotnet run +``` + +## See also + +- [Code Interpreter File Download with OpenAI](../../../02-agents/AgentWithOpenAI/Agent_OpenAI_Step06_CodeInterpreterFileDownload/) — same scenario using Public OpenAI +- [Code Interpreter](../Agent_Step14_CodeInterpreter/) — Code Interpreter without file download diff --git a/dotnet/samples/02-agents/AgentsWithFoundry/README.md b/dotnet/samples/02-agents/AgentsWithFoundry/README.md index c65cb24acd..74160ec2ec 100644 --- a/dotnet/samples/02-agents/AgentsWithFoundry/README.md +++ b/dotnet/samples/02-agents/AgentsWithFoundry/README.md @@ -72,6 +72,7 @@ Some samples require extra tool-specific environment variables. See each sample | [Web search](./Agent_Step21_WebSearch/) | Web search tool | | [Memory search](./Agent_Step22_MemorySearch/) | Memory search tool | | [Local MCP](./Agent_Step23_LocalMCP/) | Local MCP client with HTTP transport | +| [Code interpreter file download](./Agent_Step24_CodeInterpreterFileDownload/) | Download container files generated by code interpreter | ## Running the samples diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/.aspire/settings.json b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/.aspire/settings.json new file mode 100644 index 0000000000..842d8f7ce6 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/.aspire/settings.json @@ -0,0 +1,3 @@ +{ + "appHostPath": "../DevUIIntegration.AppHost/DevUIIntegration.AppHost.csproj" +} \ No newline at end of file diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/.gitignore b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/.gitignore new file mode 100644 index 0000000000..bdc7d02918 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/.gitignore @@ -0,0 +1 @@ +**/**/*.Development.json diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/DevUIIntegration.AppHost.csproj b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/DevUIIntegration.AppHost.csproj new file mode 100644 index 0000000000..35c8eb709d --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/DevUIIntegration.AppHost.csproj @@ -0,0 +1,29 @@ + + + + + + Exe + net10.0 + enable + enable + + + + + + + + + + + + + + + + + + + + diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/Program.cs b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/Program.cs new file mode 100644 index 0000000000..562e61521b --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/Program.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft. All rights reserved. + +var builder = DistributedApplication.CreateBuilder(args); + +var foundry = builder.AddAzureAIFoundry("foundry"); + +// Comment the following lines to create a new Foundry instance instead of connecting to an existing one. If creating a new instance, the DevUI resource will wait for the Foundry to be ready before starting, ensuring the DevUI frontend is available as soon as the app starts. +var existingFoundryName = builder.AddParameter("existingFoundryName") + .WithDescription("The name of the existing Azure Foundry resource."); +var existingFoundryResourceGroup = builder.AddParameter("existingFoundryResourceGroup") + .WithDescription("The resource group of the existing Azure Foundry resource."); +foundry.AsExisting(existingFoundryName, existingFoundryResourceGroup); + +// Add the writer agent service +var writerAgent = builder.AddProject("writer-agent") + .WithHttpHealthCheck("/health") + .WithReference(foundry).WaitFor(foundry); + +// Add the editor agent service +var editorAgent = builder.AddProject("editor-agent") + .WithHttpHealthCheck("/health") + .WithReference(foundry).WaitFor(foundry); + +// Add DevUI integration that aggregates agents from all agent services. +// Agent metadata is declared here so backends don't need a /v1/entities endpoint. +_ = builder.AddDevUI("devui") + .WithAgentService(writerAgent, agents: [new("writer")]) // the name of the agent should match the agent declaration in WriterAgent/Program.cs + .WithAgentService(editorAgent, agents: [new("editor")]) // the name of the agent should match the agent declaration in EditorAgent/Program.cs + .WaitFor(writerAgent) + .WaitFor(editorAgent); + +builder.Build().Run(); diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/Properties/launchSettings.json b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/Properties/launchSettings.json new file mode 100644 index 0000000000..1012f97aa1 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/Properties/launchSettings.json @@ -0,0 +1,34 @@ +{ + "$schema": "http://json.schemastore.org/launchsettings.json", + "profiles": { + "https": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "applicationUrl": "https://localhost:16500;http://localhost:16501", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development", + "DOTNET_ENVIRONMENT": "Development", + "ASPIRE_DASHBOARD_OTLP_ENDPOINT_URL": "https://localhost:17250", + "ASPIRE_DASHBOARD_MCP_ENDPOINT_URL": "https://localhost:18100", + "ASPIRE_RESOURCE_SERVICE_ENDPOINT_URL": "https://localhost:17250", + "ASPIRE_SHOW_DASHBOARD_RESOURCES": "true" + } + }, + "http": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "applicationUrl": "http://localhost:16501", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development", + "DOTNET_ENVIRONMENT": "Development", + "ASPIRE_DASHBOARD_OTLP_ENDPOINT_URL": "http://localhost:17251", + "ASPIRE_DASHBOARD_MCP_ENDPOINT_URL": "http://localhost:18101", + "ASPIRE_RESOURCE_SERVICE_ENDPOINT_URL": "http://localhost:17251", + "ASPIRE_SHOW_DASHBOARD_RESOURCES": "true", + "ASPIRE_ALLOW_UNSECURED_TRANSPORT": "true" + } + } + } +} diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/appsettings.json b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/appsettings.json new file mode 100644 index 0000000000..bfe8cb0cde --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.AppHost/appsettings.json @@ -0,0 +1,14 @@ +{ + "Azure": { + "TenantId": "", + "SubscriptionId": "", + "AllowResourceGroupCreation": true, + "ResourceGroup": "", + "Location": "", + "CredentialSource": "AzureCli" + }, + "Parameters": { + "existingFoundryName": "", + "existingFoundryResourceGroup": "" + } +} diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.ServiceDefaults/DevUIIntegration.ServiceDefaults.csproj b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.ServiceDefaults/DevUIIntegration.ServiceDefaults.csproj new file mode 100644 index 0000000000..0c5573beac --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.ServiceDefaults/DevUIIntegration.ServiceDefaults.csproj @@ -0,0 +1,22 @@ + + + + net10.0 + enable + enable + true + + + + + + + + + + + + + + + diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.ServiceDefaults/Extensions.cs b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.ServiceDefaults/Extensions.cs new file mode 100644 index 0000000000..504bc71621 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/DevUIIntegration.ServiceDefaults/Extensions.cs @@ -0,0 +1,130 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Diagnostics.HealthChecks; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Diagnostics.HealthChecks; +using Microsoft.Extensions.Logging; +using OpenTelemetry; +using OpenTelemetry.Metrics; +using OpenTelemetry.Trace; + +namespace Microsoft.Extensions.Hosting; + +// Adds common Aspire services: service discovery, resilience, health checks, and OpenTelemetry. +// This project should be referenced by each service project in your solution. +// To learn more about using this project, see https://aka.ms/dotnet/aspire/service-defaults +#pragma warning disable CA1724 // Type name 'Extensions' conflicts with namespace - acceptable for Aspire pattern +public static class Extensions +#pragma warning restore CA1724 +{ + private const string HealthEndpointPath = "/health"; + private const string AlivenessEndpointPath = "/alive"; + + public static TBuilder AddServiceDefaults(this TBuilder builder) where TBuilder : IHostApplicationBuilder + { + builder.ConfigureOpenTelemetry(); + + builder.AddDefaultHealthChecks(); + + builder.Services.AddServiceDiscovery(); + + builder.Services.ConfigureHttpClientDefaults(http => + { + // Turn on resilience by default + http.AddStandardResilienceHandler(); + + // Turn on service discovery by default + http.AddServiceDiscovery(); + }); + + // Uncomment the following to restrict the allowed schemes for service discovery. + // builder.Services.Configure(options => + // { + // options.AllowedSchemes = ["https"]; + // }); + + return builder; + } + + public static TBuilder ConfigureOpenTelemetry(this TBuilder builder) where TBuilder : IHostApplicationBuilder + { + builder.Logging.AddOpenTelemetry(logging => + { + logging.IncludeFormattedMessage = true; + logging.IncludeScopes = true; + }); + + builder.Services.AddOpenTelemetry() + .WithMetrics(metrics => + { + metrics.AddAspNetCoreInstrumentation() + .AddHttpClientInstrumentation() + .AddRuntimeInstrumentation(); + }) + .WithTracing(tracing => + { + tracing.AddSource(builder.Environment.ApplicationName) + .AddAspNetCoreInstrumentation(tracing => + // Exclude health check requests from tracing + tracing.Filter = context => + !context.Request.Path.StartsWithSegments(HealthEndpointPath) + && !context.Request.Path.StartsWithSegments(AlivenessEndpointPath) + ) + // Uncomment the following line to enable gRPC instrumentation (requires the OpenTelemetry.Instrumentation.GrpcNetClient package) + //.AddGrpcClientInstrumentation() + .AddHttpClientInstrumentation(); + }); + + builder.AddOpenTelemetryExporters(); + + return builder; + } + + private static TBuilder AddOpenTelemetryExporters(this TBuilder builder) where TBuilder : IHostApplicationBuilder + { + var useOtlpExporter = !string.IsNullOrWhiteSpace(builder.Configuration["OTEL_EXPORTER_OTLP_ENDPOINT"]); + + if (useOtlpExporter) + { + builder.Services.AddOpenTelemetry().UseOtlpExporter(); + } + + // Uncomment the following lines to enable the Azure Monitor exporter (requires the Azure.Monitor.OpenTelemetry.AspNetCore package) + //if (!string.IsNullOrEmpty(builder.Configuration["APPLICATIONINSIGHTS_CONNECTION_STRING"])) + //{ + // builder.Services.AddOpenTelemetry() + // .UseAzureMonitor(); + //} + + return builder; + } + + public static TBuilder AddDefaultHealthChecks(this TBuilder builder) where TBuilder : IHostApplicationBuilder + { + builder.Services.AddHealthChecks() + // Add a default liveness check to ensure app is responsive + .AddCheck("self", () => HealthCheckResult.Healthy(), ["live"]); + + return builder; + } + + public static WebApplication MapDefaultEndpoints(this WebApplication app) + { + // Adding health checks endpoints to applications in non-development environments has security implications. + // See https://aka.ms/dotnet/aspire/healthchecks for details before enabling these endpoints in non-development environments. + if (app.Environment.IsDevelopment()) + { + // All health checks must pass for app to be considered ready to accept traffic after starting + app.MapHealthChecks(HealthEndpointPath); + + // Only health checks tagged with the "live" tag must pass for app to be considered alive + app.MapHealthChecks(AlivenessEndpointPath, new HealthCheckOptions + { + Predicate = r => r.Tags.Contains("live") + }); + } + + return app; + } +} diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/EditorAgent.csproj b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/EditorAgent.csproj new file mode 100644 index 0000000000..865af164b0 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/EditorAgent.csproj @@ -0,0 +1,20 @@ + + + + net10.0 + enable + enable + b2c3d4e5-f6a7-8901-bcde-f12345678901 + + + + + + + + + + + + + diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/Program.cs b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/Program.cs new file mode 100644 index 0000000000..d50213a9f7 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/Program.cs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.ComponentModel; +using Azure.Identity; +using Microsoft.Agents.AI; +using Microsoft.Agents.AI.Hosting; +using Microsoft.Extensions.AI; + +var builder = WebApplication.CreateBuilder(args); + +builder.AddServiceDefaults(); + +builder.AddAzureChatCompletionsClient(connectionName: "foundry", + configureSettings: settings => + { + settings.TokenCredential = new DefaultAzureCredential(); + settings.EnableSensitiveTelemetryData = builder.Environment.IsDevelopment(); + }) + .AddChatClient("gpt41"); + +builder.AddAIAgent("editor", (sp, key) => +{ + var chatClient = sp.GetRequiredService(); + return new ChatClientAgent( + chatClient, + name: key, + instructions: "You edit short stories to improve grammar and style, ensuring the stories are less than 300 words. Once finished editing, you select a title and format the story for publishing.", + tools: [AIFunctionFactory.Create(FormatStory)] + ); +}); + +// Register services for OpenAI responses and conversations +builder.Services.AddOpenAIResponses(); +builder.Services.AddOpenAIConversations(); + +var app = builder.Build(); + +// Map OpenAI API endpoints — DevUI aggregator routes requests here +app.MapOpenAIResponses(); +app.MapOpenAIConversations(); + +app.MapDefaultEndpoints(); + +app.Run(); + +[Description("Formats the story for publication, revealing its title.")] +static string FormatStory(string title, string story) => $""" + **Title**: {title} + + {story} + """; diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/Properties/launchSettings.json b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/Properties/launchSettings.json new file mode 100644 index 0000000000..3ad5a6f098 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/EditorAgent/Properties/launchSettings.json @@ -0,0 +1,14 @@ +{ + "$schema": "http://json.schemastore.org/launchsettings.json", + "profiles": { + "http": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "applicationUrl": "http://localhost:5281", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + } + } +} diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/README.md b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/README.md new file mode 100644 index 0000000000..22f135eaa3 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/README.md @@ -0,0 +1,99 @@ +# DevUI Integration Sample + +This sample demonstrates how to use the **Aspire.Hosting.AgentFramework.DevUI** library to test and debug multiple AI agents through a unified DevUI web interface, orchestrated by an Aspire AppHost. + +The solution contains two agent services: + +- **WriterAgent** — a simple agent that writes short stories (≤ 300 words) about a given topic. +- **EditorAgent** — an agent that edits stories for grammar and style, selects a title, and formats the result for publishing. It also demonstrates tool use via `AIFunctionFactory`. + +## Prerequisites + +- [.NET 10 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/10.0) +- [Aspire CLI](https://learn.microsoft.com/dotnet/aspire/fundamentals/setup-tooling) +- An Azure subscription with access to [Azure AI Foundry](https://learn.microsoft.com/azure/ai-studio/) +- Azure CLI authenticated (`az login`) + +## Azure AI Foundry configuration + +The sample requires an Azure AI Foundry resource with a deployed `gpt-4.1` model. You have two options: + +### Option 1: Connect to an existing Foundry resource + +Fill in the parameters in `DevUIIntegration.AppHost/appsettings.json`: + +```json +{ + "Azure": { + "TenantId": "", + "SubscriptionId": "", + "AllowResourceGroupCreation": true, + "ResourceGroup": "", + "Location": "", + "CredentialSource": "AzureCli" + }, + "Parameters": { + "existingFoundryName": "", + "existingFoundryResourceGroup": "" + } +} +``` + +The AppHost calls `foundry.AsExisting(...)` with these parameters, so Aspire connects to the existing resource instead of provisioning a new one. + +### Option 2: Let Aspire provision a new Foundry resource + +Remove or comment out the `AsExisting` block in `DevUIIntegration.AppHost/Program.cs`: + +```csharp +// Comment the following lines to create a new Foundry instance +// _ = builder.AddParameterFromConfiguration("tenant", "Azure:TenantId"); +// var existingFoundryName = builder.AddParameter("existingFoundryName") ... +// foundry.AsExisting(existingFoundryName, existingFoundryResourceGroup); +``` + +Aspire will provision a new Azure AI Foundry resource on startup. The DevUI resource uses `.WaitFor(foundry)` transitively through the agent services, so the frontend won't become available until provisioning completes. This can take several minutes on first run. + +You still need to fill in the `Azure` section of `appsettings.json` (subscription, location, etc.) so Aspire knows where to create the resource. + +## Agent name matching with `WithAgentService` + +When connecting agent services to DevUI in the AppHost, you must pass the correct agent name via the `agents:` parameter. **This name must match the name used in `AddAIAgent(...)` inside each agent service's `Program.cs` — not the Aspire resource name.** + +For example, the WriterAgent Aspire resource is named `"writer-agent"`, but the agent is registered as `"writer"`: + +```csharp +// WriterAgent/Program.cs +builder.AddAIAgent("writer", "You write short stories ..."); +// ^^^^^^^^ this is the agent name +``` + +```csharp +// EditorAgent/Program.cs +builder.AddAIAgent("editor", (sp, key) => { ... }); +// ^^^^^^^^ this is the agent name +``` + +The AppHost must use these exact names: + +```csharp +// DevUIIntegration.AppHost/Program.cs +builder.AddDevUI("devui") + .WithAgentService(writerAgent, agents: [new("writer")]) // ✅ matches AddAIAgent("writer", ...) + .WithAgentService(editorAgent, agents: [new("editor")]) // ✅ matches AddAIAgent("editor", ...) + .WaitFor(writerAgent) + .WaitFor(editorAgent); +``` + +Using the wrong name (e.g., `new("writer-agent")` instead of `new("writer")`) will cause the aggregator to send an entity ID the backend doesn't recognize, resulting in 404 errors when interacting with the agent. + +If you omit the `agents:` parameter entirely, the aggregator defaults to a single agent named after the Aspire resource (e.g., `"writer-agent"`). Since agent services don't expose a `/v1/entities` discovery endpoint, **the Aspire resource name must exactly match the agent name registered via `AddAIAgent(...)` in the service's `Program.cs`**. + +## Running the sample + +```bash +cd dotnet/samples/05-end-to-end/DevUIAspireIntegration +aspire run +``` + +Once all services are running, open the **DevUI** URL shown in the Aspire dashboard. You should see both the writer and editor agents listed — select one and start a conversation. diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/Program.cs b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/Program.cs new file mode 100644 index 0000000000..72f3215453 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/Program.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Azure.Identity; +using Microsoft.Agents.AI.Hosting; + +var builder = WebApplication.CreateBuilder(args); + +builder.AddServiceDefaults(); + +builder.AddAzureChatCompletionsClient(connectionName: "foundry", + configureSettings: settings => + { + settings.TokenCredential = new DefaultAzureCredential(); + settings.EnableSensitiveTelemetryData = builder.Environment.IsDevelopment(); + }) + .AddChatClient("gpt41"); + +builder.AddAIAgent("writer", "You write short stories (300 words or less) about the specified topic."); + +// Register services for OpenAI responses and conversations +builder.Services.AddOpenAIResponses(); +builder.Services.AddOpenAIConversations(); + +var app = builder.Build(); + +// Map OpenAI API endpoints — DevUI aggregator routes requests here +app.MapOpenAIResponses(); +app.MapOpenAIConversations(); + +app.MapDefaultEndpoints(); + +app.Run(); diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/Properties/launchSettings.json b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/Properties/launchSettings.json new file mode 100644 index 0000000000..5220475800 --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/Properties/launchSettings.json @@ -0,0 +1,14 @@ +{ + "$schema": "http://json.schemastore.org/launchsettings.json", + "profiles": { + "http": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "applicationUrl": "http://localhost:5280", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + } + } +} diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/WriterAgent.csproj b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/WriterAgent.csproj new file mode 100644 index 0000000000..ef457ff1fb --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/WriterAgent/WriterAgent.csproj @@ -0,0 +1,20 @@ + + + + net10.0 + enable + enable + a1b2c3d4-e5f6-7890-abcd-ef1234567890 + + + + + + + + + + + + + diff --git a/dotnet/samples/05-end-to-end/DevUIAspireIntegration/aspire.config.json b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/aspire.config.json new file mode 100644 index 0000000000..d9ca439f8b --- /dev/null +++ b/dotnet/samples/05-end-to-end/DevUIAspireIntegration/aspire.config.json @@ -0,0 +1,5 @@ +{ + "appHost": { + "path": "DevUIIntegration.AppHost/DevUIIntegration.AppHost.csproj" + } +} \ No newline at end of file diff --git a/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentEntityInfo.cs b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentEntityInfo.cs new file mode 100644 index 0000000000..c308963fc1 --- /dev/null +++ b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentEntityInfo.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Aspire.Hosting.AgentFramework; + +/// +/// Describes an AI agent exposed by an agent service backend, used for entity discovery in DevUI. +/// +/// +/// +/// When added via , +/// agent metadata is declared at the AppHost level so that the DevUI aggregator can build the +/// entity listing without querying each backend's /v1/entities endpoint. +/// +/// +/// Agent services only need to expose the standard OpenAI Responses and Conversations API endpoints +/// (MapOpenAIResponses and MapOpenAIConversations), not a custom discovery endpoint. +/// +/// +/// The unique identifier for the agent, typically matching the name passed to AddAIAgent. +/// A short description of the agent's capabilities. +public record AgentEntityInfo(string Id, string? Description = null) +{ + /// + /// Gets the display name for the agent. Defaults to if not specified. + /// + public string Name { get; init; } = Id; + + /// + /// Gets the entity type. Defaults to "agent". + /// + public string Type { get; init; } = "agent"; + + /// + /// Gets the framework identifier. Defaults to "agent_framework". + /// + public string Framework { get; init; } = "agent_framework"; +} diff --git a/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentFrameworkBuilderExtensions.cs b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentFrameworkBuilderExtensions.cs new file mode 100644 index 0000000000..7e7d5b16c0 --- /dev/null +++ b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentFrameworkBuilderExtensions.cs @@ -0,0 +1,185 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using Aspire.Hosting.AgentFramework; +using Aspire.Hosting.ApplicationModel; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; + +namespace Aspire.Hosting; + +/// +/// Provides extension methods for adding Agent Framework DevUI resources to the application model. +/// +public static class AgentFrameworkBuilderExtensions +{ + /// + /// Adds a DevUI resource for testing AI agents in a distributed application. + /// + /// + /// + /// DevUI is a web-based interface for testing and debugging AI agents using the OpenAI Responses protocol. + /// When configured with , it aggregates agents from multiple backend services + /// and provides a unified testing interface. + /// + /// + /// The aggregator runs as an in-process reverse proxy within the AppHost, requiring no external container image. + /// It serves the DevUI frontend from embedded resources in Microsoft.Agents.AI.DevUI when available, and + /// falls back to proxying from the first configured backend. It aggregates entity listings from all backends. + /// + /// + /// This resource is excluded from the deployment manifest as it is intended for development use only. + /// + /// + /// The . + /// The name to give the resource. + /// The host port for the DevUI web interface. If not specified, a random port will be assigned. + /// A reference to the for chaining. + /// + /// + /// var devui = builder.AddDevUI("devui") + /// .WithAgentService(dotnetAgent) + /// .WithAgentService(pythonAgent); + /// + /// + public static IResourceBuilder AddDevUI( + this IDistributedApplicationBuilder builder, + string name, + int? port = null) + { + ArgumentNullException.ThrowIfNull(builder); + ArgumentNullException.ThrowIfNull(name); + + var resource = new DevUIResource(name, port); + + var resourceBuilder = builder.AddResource(resource) + .ExcludeFromManifest(); // DevUI is a dev-only tool + + // Initialize the in-process aggregator when the resource is initialized by the orchestrator + builder.Eventing.Subscribe(resource, async (e, ct) => + { + var logger = e.Logger; + var aggregator = new DevUIAggregatorHostedService(resource, e.Services.GetRequiredService().CreateLogger()); + + try + { + // Wait for dependencies (e.g. agent service backends) before starting. + // Custom resources must manually publish BeforeResourceStartedEvent to trigger + // the orchestrator's WaitFor mechanism. + await e.Eventing.PublishAsync(new BeforeResourceStartedEvent(resource, e.Services), ct).ConfigureAwait(false); + + await e.Notifications.PublishUpdateAsync(resource, snapshot => snapshot with + { + State = KnownResourceStates.Starting + }).ConfigureAwait(false); + + await aggregator.StartAsync(ct).ConfigureAwait(false); + + // Allocate the endpoint so the URL appears in the Aspire dashboard + var endpointAnnotation = resource.Annotations + .OfType() + .First(ea => ea.Name == DevUIResource.PrimaryEndpointName); + + endpointAnnotation.AllocatedEndpoint = new AllocatedEndpoint( + endpointAnnotation, "localhost", aggregator.AllocatedPort); + + var devuiUrl = $"http://localhost:{aggregator.AllocatedPort}/devui/"; + + await e.Notifications.PublishUpdateAsync(resource, snapshot => snapshot with + { + State = KnownResourceStates.Running, + Urls = [new UrlSnapshot("DevUI", devuiUrl, IsInternal: false)] + }).ConfigureAwait(false); + + // Shut down the aggregator when the app stops + var lifetime = e.Services.GetRequiredService(); + lifetime.ApplicationStopping.Register(() => + { + e.Notifications.PublishUpdateAsync(resource, snapshot => snapshot with + { + State = KnownResourceStates.Finished + }).GetAwaiter().GetResult(); + + aggregator.StopAsync(CancellationToken.None).GetAwaiter().GetResult(); + aggregator.DisposeAsync().AsTask().GetAwaiter().GetResult(); + }); + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to start DevUI aggregator"); + + await aggregator.DisposeAsync().ConfigureAwait(false); + + await e.Notifications.PublishUpdateAsync(resource, snapshot => snapshot with + { + State = KnownResourceStates.FailedToStart + }).ConfigureAwait(false); + } + }); + + return resourceBuilder; + } + + /// + /// Configures DevUI to connect to an agent service backend. + /// + /// + /// + /// Each agent service should expose the OpenAI Responses and Conversations API endpoints + /// (via MapOpenAIResponses and MapOpenAIConversations). + /// + /// + /// When is provided, the aggregator builds the entity listing from + /// these declarations without querying the backend. When not provided, a single agent named + /// after the service resource is assumed. Agent services don't need a /v1/entities endpoint. + /// + /// + /// The type of the agent service resource. + /// The DevUI resource builder. + /// The agent service resource to connect to. + /// + /// Optional list of agents declared by this backend. When provided, the aggregator uses these + /// declarations directly. When not provided, defaults to a single agent named after the + /// resource. The backend doesn't need to expose a + /// /v1/entities endpoint in either case. + /// + /// + /// An optional prefix to add to entity IDs from this backend. + /// If not specified, the resource name will be used as the prefix. + /// + /// A reference to the for chaining. + /// + /// + /// var writerAgent = builder.AddProject<Projects.WriterAgent>("writer-agent"); + /// var editorAgent = builder.AddProject<Projects.EditorAgent>("editor-agent"); + /// + /// builder.AddDevUI("devui") + /// .WithAgentService(writerAgent, agents: [new("writer", "Writes short stories")]) + /// .WithAgentService(editorAgent, agents: [new("editor", "Edits and formats stories")]) + /// .WaitFor(writerAgent) + /// .WaitFor(editorAgent); + /// + /// + public static IResourceBuilder WithAgentService( + this IResourceBuilder builder, + IResourceBuilder agentService, + IReadOnlyList? agents = null, + string? entityIdPrefix = null) + where TSource : IResourceWithEndpoints + { + ArgumentNullException.ThrowIfNull(builder); + ArgumentNullException.ThrowIfNull(agentService); + + // Default to a single agent named after the service resource + agents ??= [new AgentEntityInfo(agentService.Resource.Name)]; + + builder.WithAnnotation(new AgentServiceAnnotation(agentService.Resource, entityIdPrefix, agents)); + builder.WithRelationship(agentService.Resource, "agent-backend"); + + return builder; + } +} diff --git a/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentServiceAnnotation.cs b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentServiceAnnotation.cs new file mode 100644 index 0000000000..15b3f7dd90 --- /dev/null +++ b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/AgentServiceAnnotation.cs @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using Aspire.Hosting.AgentFramework; + +namespace Aspire.Hosting.ApplicationModel; + +/// +/// An annotation that tracks an agent service backend referenced by a DevUI resource. +/// +/// +/// This annotation is used to configure DevUI to aggregate entities from multiple +/// agent service backends. Each annotation represents one backend that DevUI should +/// connect to for entity discovery and request routing. +/// +public class AgentServiceAnnotation : IResourceAnnotation +{ + /// + /// Initializes a new instance of the class. + /// + /// The agent service resource. + /// + /// An optional prefix to add to entity IDs from this backend to avoid conflicts. + /// If not specified, the resource name will be used as the prefix. + /// + /// + /// Optional list of agents declared by this backend. When provided, the aggregator builds the entity + /// listing directly from these declarations instead of querying the backend's /v1/entities endpoint. + /// + public AgentServiceAnnotation(IResource agentService, string? entityIdPrefix = null, IReadOnlyList? agents = null) + { + ArgumentNullException.ThrowIfNull(agentService); + + this.AgentService = agentService; + this.EntityIdPrefix = entityIdPrefix; + this.Agents = agents ?? []; + } + + /// + /// Gets the agent service resource that exposes AI agents. + /// + public IResource AgentService { get; } + + /// + /// Gets the prefix to use for entity IDs from this backend. + /// + /// + /// When null, the resource name will be used as the prefix. + /// Entity IDs will be formatted as "{prefix}/{entityId}" to ensure uniqueness + /// across multiple agent backends. + /// + public string? EntityIdPrefix { get; } + + /// + /// Gets the list of agents declared by this backend. + /// + /// + /// When non-empty, the DevUI aggregator uses these declarations to build the entity listing + /// without querying the backend. When empty, the aggregator falls back to calling + /// GET /v1/entities on the backend for discovery. + /// + public IReadOnlyList Agents { get; } +} diff --git a/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/Aspire.Hosting.AgentFramework.DevUI.csproj b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/Aspire.Hosting.AgentFramework.DevUI.csproj new file mode 100644 index 0000000000..0f45c95147 --- /dev/null +++ b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/Aspire.Hosting.AgentFramework.DevUI.csproj @@ -0,0 +1,25 @@ + + + + $(TargetFrameworksCore) + true + aspire integration hosting agent-framework devui ai agents + Microsoft Agent Framework DevUI support for Aspire. + + + $(NoWarn);CA1873;RCS1061;VSTHRD002;IL2026;IL3050 + + + + + + + + + + + + + + + diff --git a/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/DevUIAggregatorHostedService.cs b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/DevUIAggregatorHostedService.cs new file mode 100644 index 0000000000..d65efaca07 --- /dev/null +++ b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/DevUIAggregatorHostedService.cs @@ -0,0 +1,779 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net.Http; +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Threading; +using System.Threading.Tasks; +using Aspire.Hosting.ApplicationModel; +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Hosting.Server; +using Microsoft.AspNetCore.Hosting.Server.Features; +using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.StaticFiles; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +namespace Aspire.Hosting.AgentFramework; + +/// +/// Hosts an in-process reverse proxy that aggregates DevUI entities from multiple agent backends. +/// Serves the DevUI frontend directly from the Microsoft.Agents.AI.DevUI assembly's embedded +/// resources and intercepts API calls to provide multi-backend entity aggregation and request routing. +/// +internal sealed class DevUIAggregatorHostedService : IAsyncDisposable +{ + private static readonly FileExtensionContentTypeProvider s_contentTypeProvider = new(); + + private WebApplication? _app; + private readonly DevUIResource _resource; + private readonly ILogger _logger; + + // Frontend resources loaded from the Microsoft.Agents.AI.DevUI assembly (null if unavailable) + private readonly Dictionary? _frontendResources; + + // Maps conversation IDs to backend URLs for routing GET requests that lack agent_id context. + // Populated when the aggregator routes conversation requests to a positively-resolved backend. + private readonly ConcurrentDictionary _conversationBackendMap = new(StringComparer.OrdinalIgnoreCase); + + public DevUIAggregatorHostedService( + DevUIResource resource, + ILogger logger) + { + this._resource = resource; + this._logger = logger; + this._frontendResources = LoadFrontendResources(logger); + } + + /// + /// Gets the port the aggregator is listening on, available after . + /// + internal int AllocatedPort { get; private set; } + + public async Task StartAsync(CancellationToken cancellationToken) + { + var builder = WebApplication.CreateSlimBuilder(); + builder.Logging.ClearProviders(); + + builder.Services.AddHttpClient("devui-proxy") + .ConfigurePrimaryHttpMessageHandler(() => new HttpClientHandler + { + AllowAutoRedirect = false + }); + + this._app = builder.Build(); + + // Bind to a fixed port if one was specified on the DevUI resource; otherwise use 0 for dynamic allocation. + var port = this._resource.Port ?? 0; + this._app.Urls.Add($"http://127.0.0.1:{port}"); + this.MapRoutes(this._app); + + await this._app.StartAsync(cancellationToken).ConfigureAwait(false); + + var serverAddresses = this._app.Services.GetRequiredService() + .Features.Get(); + + if (serverAddresses is not null) + { + var address = serverAddresses.Addresses.First(); + var uri = new Uri(address); + this.AllocatedPort = uri.Port; + this._logger.LogInformation("DevUI aggregator started on port {Port}", this.AllocatedPort); + } + } + + public async Task StopAsync(CancellationToken cancellationToken) + { + if (this._app is not null) + { + await this._app.StopAsync(cancellationToken).ConfigureAwait(false); + } + } + + public async ValueTask DisposeAsync() + { + if (this._app is not null) + { + await this._app.DisposeAsync().ConfigureAwait(false); + this._app = null; + } + } + + /// + /// Loads the DevUI frontend resources from the Microsoft.Agents.AI.DevUI assembly. + /// The assembly embeds the Vite SPA build output as manifest resources. + /// Returns null if the assembly is not available. + /// + private static Dictionary? LoadFrontendResources(ILogger logger) + { + Assembly assembly; + try + { + assembly = Assembly.Load("Microsoft.Agents.AI.DevUI"); + } + catch (Exception ex) + { + logger.LogDebug(ex, "Microsoft.Agents.AI.DevUI assembly not found. Frontend will be proxied from backends."); + return null; + } + + var prefix = $"{assembly.GetName().Name}.resources."; + var resources = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var name in assembly.GetManifestResourceNames()) + { + if (!name.StartsWith(prefix, StringComparison.Ordinal)) + { + continue; + } + + // The DevUI middleware maps resource names by replacing dots with slashes. + // Both the key and lookup use the same transform, so they match. + var key = name[prefix.Length..].Replace('.', '/'); + s_contentTypeProvider.TryGetContentType(name, out var contentType); + resources[key] = (name, contentType ?? "application/octet-stream"); + } + + if (resources.Count == 0) + { + logger.LogWarning("Microsoft.Agents.AI.DevUI assembly loaded but contains no frontend resources"); + return null; + } + + logger.LogDebug("Loaded {Count} DevUI frontend resources from assembly", resources.Count); + return resources; + } + + /// + /// Serves the DevUI frontend. Uses embedded assembly resources if available, + /// otherwise falls back to proxying from the first backend agent service. + /// + private async Task ServeDevUIFrontendAsync(HttpContext context, string? path) + { + // Redirect /devui to /devui/ so relative URLs in the SPA resolve correctly + if (string.IsNullOrEmpty(path) && context.Request.Path.Value is { } reqPath && !reqPath.EndsWith('/')) + { + var redirect = reqPath + "/"; + if (context.Request.QueryString.HasValue) + { + redirect += context.Request.QueryString.Value; + } + + context.Response.StatusCode = StatusCodes.Status301MovedPermanently; + context.Response.Headers.Location = redirect; + return; + } + + // Try embedded resources first + if (this._frontendResources is not null) + { + var resourcePath = string.IsNullOrEmpty(path) ? "index.html" : path; + + if (await this.TryServeResourceAsync(context, resourcePath).ConfigureAwait(false)) + { + return; + } + + // SPA fallback: serve index.html for paths without a file extension (client-side routing) + if (!resourcePath.Contains('.', StringComparison.Ordinal) && + await this.TryServeResourceAsync(context, "index.html").ConfigureAwait(false)) + { + return; + } + + context.Response.StatusCode = StatusCodes.Status404NotFound; + return; + } + + // Fallback: proxy from the first backend that serves /devui + var backends = this.ResolveBackends(); + var firstBackendUrl = backends.Values.FirstOrDefault(); + + if (firstBackendUrl is null) + { + context.Response.StatusCode = StatusCodes.Status503ServiceUnavailable; + context.Response.ContentType = "text/plain"; + await context.Response.WriteAsync( + "DevUI: No agent service backends are available yet.", context.RequestAborted).ConfigureAwait(false); + return; + } + + var targetPath = string.IsNullOrEmpty(path) ? "/devui/" : $"/devui/{path}"; + await ProxyRequestAsync( + context, firstBackendUrl, targetPath + context.Request.QueryString, bodyBytes: null).ConfigureAwait(false); + } + + private async Task TryServeResourceAsync(HttpContext context, string resourcePath) + { + if (this._frontendResources is null) + { + return false; + } + + var key = resourcePath.Replace('.', '/'); + + if (!this._frontendResources.TryGetValue(key, out var entry)) + { + return false; + } + + Assembly assembly; + try + { + assembly = Assembly.Load("Microsoft.Agents.AI.DevUI"); + } + catch + { + return false; + } + + using var stream = assembly.GetManifestResourceStream(entry.ResourceName); + + if (stream is null) + { + return false; + } + + context.Response.ContentType = entry.ContentType; + context.Response.Headers.CacheControl = "no-cache, no-store"; + await stream.CopyToAsync(context.Response.Body, context.RequestAborted).ConfigureAwait(false); + return true; + } + + private static IResult GetMeta() + { + return Results.Json(new + { + ui_mode = "developer", + version = "0.1.0", + framework = "agent_framework", + runtime = "dotnet", + capabilities = new Dictionary + { + ["tracing"] = false, + ["openai_proxy"] = false, + ["deployment"] = false + }, + auth_required = false + }); + } + + private void MapRoutes(WebApplication app) + { + app.MapGet("/health", () => Results.Ok(new { status = "healthy" })); + + // Intercept API calls for multi-backend aggregation and routing + app.MapGet("/v1/entities", (Delegate)this.AggregateEntitiesAsync); + app.MapGet("/v1/entities/{**entityPath}", this.RouteEntityInfoAsync); + app.MapPost("/v1/responses", this.RouteResponsesAsync); + app.Map("/v1/conversations/{**path}", this.ProxyConversationsAsync); + app.MapGet("/meta", GetMeta); + + // Serve the DevUI frontend from embedded assembly resources + app.Map("/devui/{**path}", this.ServeDevUIFrontendAsync); + } + + /// + /// Resolves backend URLs from the resource's annotations. + /// This method does not cache results to ensure late-allocated backends are always discovered. + /// + private Dictionary ResolveBackends() + { + var result = new Dictionary(StringComparer.Ordinal); + + foreach (var annotation in this._resource.Annotations.OfType()) + { + if (annotation.AgentService is not IResourceWithEndpoints rwe) + { + continue; + } + + var prefix = annotation.EntityIdPrefix ?? annotation.AgentService.Name; + + try + { + var endpoint = rwe.GetEndpoint("http"); + if (endpoint.IsAllocated) + { + result[prefix] = endpoint.Url; + } + } + catch (Exception ex) + { + this._logger.LogDebug(ex, "Backend '{Prefix}' endpoint not yet available", prefix); + } + } + + return result; + } + + private async Task AggregateEntitiesAsync(HttpContext context) + { + var backends = this.ResolveBackends(); + var allEntities = new JsonArray(); + + foreach (var annotation in this._resource.Annotations.OfType()) + { + var prefix = annotation.EntityIdPrefix ?? annotation.AgentService.Name; + + if (annotation.Agents.Count > 0) + { + // Build entities from AppHost-declared metadata — no backend call needed + foreach (var agent in annotation.Agents) + { + allEntities.Add(new JsonObject + { + ["id"] = $"{prefix}/{agent.Id}", + ["type"] = agent.Type, + ["name"] = agent.Name, + ["description"] = agent.Description, + ["framework"] = agent.Framework, + ["_original_id"] = agent.Id, + ["_backend"] = prefix + }); + } + + continue; + } + + // Fallback: query backend /v1/entities for discovery + if (!backends.TryGetValue(prefix, out var baseUrl)) + { + continue; + } + + try + { + var httpClientFactory = context.RequestServices.GetRequiredService(); + using var client = httpClientFactory.CreateClient("devui-proxy"); + var response = await client.GetAsync( + new Uri(new Uri(baseUrl), "/v1/entities"), + context.RequestAborted).ConfigureAwait(false); + + if (!response.IsSuccessStatusCode) + { + this._logger.LogWarning( + "Failed to fetch entities from backend '{Prefix}' at {Url}: {Status}", + prefix, baseUrl, response.StatusCode); + continue; + } + + var json = await response.Content.ReadAsStringAsync(context.RequestAborted).ConfigureAwait(false); + var doc = JsonNode.Parse(json); + var entities = doc?["entities"]?.AsArray(); + + if (entities is null) + { + continue; + } + + foreach (var entity in entities) + { + if (entity is null) + { + continue; + } + + var cloned = entity.DeepClone(); + var id = cloned["id"]?.GetValue() ?? cloned["name"]?.GetValue(); + + if (id is not null) + { + cloned["id"] = $"{prefix}/{id}"; + cloned["_original_id"] = id; + cloned["_backend"] = prefix; + } + + allEntities.Add(cloned); + } + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + this._logger.LogWarning(ex, "Error fetching entities from backend '{Prefix}' at {Url}", prefix, baseUrl); + } + } + + return Results.Json(new { entities = allEntities }); + } + + private async Task RouteEntityInfoAsync(HttpContext context, string entityPath) + { + var (backendUrl, actualPath) = this.ResolveBackend(entityPath); + + if (backendUrl is null) + { + context.Response.StatusCode = StatusCodes.Status404NotFound; + return; + } + + var httpClientFactory = context.RequestServices.GetRequiredService(); + using var client = httpClientFactory.CreateClient("devui-proxy"); + var targetUrl = new Uri(new Uri(backendUrl), $"/v1/entities/{actualPath}"); + + using var response = await client.GetAsync(targetUrl, context.RequestAborted).ConfigureAwait(false); + await CopyResponseAsync(response, context).ConfigureAwait(false); + } + + private async Task RouteResponsesAsync(HttpContext context) + { + var bodyBytes = await ReadRequestBodyAsync(context.Request).ConfigureAwait(false); + var json = JsonNode.Parse(bodyBytes); + var entityId = json?["metadata"]?["entity_id"]?.GetValue(); + + if (entityId is null) + { + var firstBackend = this.ResolveBackends().Values.FirstOrDefault(); + if (firstBackend is null) + { + context.Response.StatusCode = StatusCodes.Status502BadGateway; + return; + } + + await ProxyRequestAsync(context, firstBackend, "/v1/responses", bodyBytes).ConfigureAwait(false); + return; + } + + var (backendUrl, actualEntityId) = this.ResolveBackend(entityId); + + if (backendUrl is null) + { + context.Response.StatusCode = StatusCodes.Status404NotFound; + await context.Response.WriteAsJsonAsync( + new { error = $"No backend found for entity '{entityId}'" }, + context.RequestAborted).ConfigureAwait(false); + return; + } + + // Rewrite entity_id to the un-prefixed original value + json!["metadata"]!["entity_id"] = actualEntityId; + var rewrittenBody = JsonSerializer.SerializeToUtf8Bytes(json); + + await ProxyRequestAsync(context, backendUrl, "/v1/responses", rewrittenBody, streaming: true).ConfigureAwait(false); + } + + private async Task ProxyConversationsAsync(HttpContext context, string? path) + { + // Try to determine the backend from agent_id query param or request body + string? backendUrl = null; + string? actualAgentId = null; + + var agentId = context.Request.Query["agent_id"].FirstOrDefault(); + if (agentId is not null) + { + (backendUrl, actualAgentId) = this.ResolveBackend(agentId); + } + + // Build query string with rewritten agent_id if we resolved from query param + var queryString = (agentId is not null && actualAgentId is not null) + ? RewriteAgentIdInQueryString(context.Request.QueryString, actualAgentId) + : context.Request.QueryString.ToString(); + + // Try conversation→backend map for previously-seen conversations + if (backendUrl is null) + { + var conversationId = ExtractConversationId(path); + if (conversationId is not null && this._conversationBackendMap.TryGetValue(conversationId, out var mappedUrl)) + { + backendUrl = mappedUrl; + } + } + + // Always read the request body when present so it isn't dropped during proxying + byte[]? bodyBytes = null; + if (context.Request.ContentLength > 0) + { + bodyBytes = await ReadRequestBodyAsync(context.Request).ConfigureAwait(false); + } + + // Try to resolve backend from request body metadata when not yet determined + if (backendUrl is null && bodyBytes is not null) + { + var json = JsonNode.Parse(bodyBytes); + var entityId = json?["metadata"]?["entity_id"]?.GetValue() + ?? json?["metadata"]?["agent_id"]?.GetValue(); + + if (entityId is not null) + { + string actualId; + (backendUrl, actualId) = this.ResolveBackend(entityId); + + if (backendUrl is not null) + { + // Rewrite the entity/agent id to the un-prefixed value + if (json?["metadata"]?["entity_id"] is not null) + { + json!["metadata"]!["entity_id"] = actualId; + } + + if (json?["metadata"]?["agent_id"] is not null) + { + json!["metadata"]!["agent_id"] = actualId; + } + + bodyBytes = JsonSerializer.SerializeToUtf8Bytes(json); + var targetPath = string.IsNullOrEmpty(path) ? "/v1/conversations" : $"/v1/conversations/{path}"; + + // Also rewrite query string agent_id if present + var bodyQueryString = (agentId is not null) + ? RewriteAgentIdInQueryString(context.Request.QueryString, actualId) + : context.Request.QueryString.ToString(); + + await this.ProxyAndRecordConversationAsync( + context, backendUrl, path, targetPath + bodyQueryString, bodyBytes).ConfigureAwait(false); + return; + } + } + + // Couldn't determine backend from body; proxy raw bytes to first backend + backendUrl = this.ResolveBackends().Values.FirstOrDefault(); + if (backendUrl is null) + { + context.Response.StatusCode = StatusCodes.Status502BadGateway; + return; + } + + var targetPathFallback = string.IsNullOrEmpty(path) ? "/v1/conversations" : $"/v1/conversations/{path}"; + await ProxyRequestAsync( + context, backendUrl, targetPathFallback + queryString, bodyBytes).ConfigureAwait(false); + return; + } + + // Route to resolved backend (from query or conversation map), or fall back to first backend + var backendKnown = backendUrl is not null; + backendUrl ??= this.ResolveBackends().Values.FirstOrDefault(); + if (backendUrl is null) + { + context.Response.StatusCode = StatusCodes.Status502BadGateway; + return; + } + + var convPath = string.IsNullOrEmpty(path) ? "/v1/conversations" : $"/v1/conversations/{path}"; + if (backendKnown) + { + await this.ProxyAndRecordConversationAsync( + context, backendUrl, path, convPath + queryString, bodyBytes).ConfigureAwait(false); + } + else + { + await ProxyRequestAsync( + context, backendUrl, convPath + queryString, bodyBytes).ConfigureAwait(false); + } + } + + /// + /// Rewrites the agent_id query parameter to the un-prefixed value for backend routing. + /// + internal static string RewriteAgentIdInQueryString(QueryString queryString, string actualAgentId) + { + if (!queryString.HasValue) + { + return string.Empty; + } + + var query = Microsoft.AspNetCore.WebUtilities.QueryHelpers.ParseQuery(queryString.Value); + query["agent_id"] = actualAgentId; + + return QueryString.Create(query).ToString(); + } + + private static string? ExtractConversationId(string? path) + { + if (string.IsNullOrEmpty(path)) + { + return null; + } + + var slashIndex = path.IndexOf('/'); + return slashIndex > 0 ? path[..slashIndex] : path; + } + + /// + /// Records the conversation→backend mapping and proxies the request. + /// For creation POSTs (no conversation ID in path), intercepts the response to capture the new ID. + /// + private async Task ProxyAndRecordConversationAsync( + HttpContext context, + string backendUrl, + string? conversationPath, + string targetUrl, + byte[]? bodyBytes) + { + var conversationId = ExtractConversationId(conversationPath); + if (conversationId is not null) + { + // We already know the conversation ID — record and proxy normally + this._conversationBackendMap[conversationId] = backendUrl; + await ProxyRequestAsync(context, backendUrl, targetUrl, bodyBytes).ConfigureAwait(false); + return; + } + + // Creation POST: intercept response to capture the new conversation ID + if (!context.Request.Method.Equals("POST", StringComparison.OrdinalIgnoreCase)) + { + await ProxyRequestAsync(context, backendUrl, targetUrl, bodyBytes).ConfigureAwait(false); + return; + } + + var originalBody = context.Response.Body; + using var buffer = new MemoryStream(); + context.Response.Body = buffer; + + try + { + await ProxyRequestAsync(context, backendUrl, targetUrl, bodyBytes).ConfigureAwait(false); + + if (context.Response.StatusCode is >= 200 and < 300) + { + buffer.Position = 0; + try + { + using var doc = await JsonDocument.ParseAsync( + buffer, cancellationToken: context.RequestAborted).ConfigureAwait(false); + if (doc.RootElement.TryGetProperty("id", out var idProp) && + idProp.ValueKind == JsonValueKind.String) + { + var createdId = idProp.GetString(); + if (createdId is not null) + { + this._conversationBackendMap[createdId] = backendUrl; + this._logger.LogDebug( + "Recorded conversation '{ConversationId}' → backend '{BackendUrl}'", + createdId, backendUrl); + } + } + } + catch + { + // Best-effort: response may not be parseable JSON + } + } + } + finally + { + context.Response.Body = originalBody; + buffer.Position = 0; + await buffer.CopyToAsync(originalBody, context.RequestAborted).ConfigureAwait(false); + } + } + + private static async Task ProxyRequestAsync( + HttpContext context, + string backendUrl, + string path, + byte[]? bodyBytes, + bool streaming = false) + { + var httpClientFactory = context.RequestServices.GetRequiredService(); + using var client = httpClientFactory.CreateClient("devui-proxy"); + + var targetUri = new Uri(new Uri(backendUrl), path); + using var request = new HttpRequestMessage(new HttpMethod(context.Request.Method), targetUri); + + foreach (var header in context.Request.Headers) + { + if (IsHopByHopHeader(header.Key)) + { + continue; + } + + request.Headers.TryAddWithoutValidation(header.Key, header.Value.ToArray()); + } + + if (bodyBytes is not null) + { + request.Content = new ByteArrayContent(bodyBytes); + if (context.Request.ContentType is not null) + { + request.Content.Headers.ContentType = + System.Net.Http.Headers.MediaTypeHeaderValue.Parse(context.Request.ContentType); + } + } + + var completionOption = streaming + ? HttpCompletionOption.ResponseHeadersRead + : HttpCompletionOption.ResponseContentRead; + + using var response = await client.SendAsync( + request, completionOption, context.RequestAborted).ConfigureAwait(false); + + if (streaming && response.Content.Headers.ContentType?.MediaType == "text/event-stream") + { + context.Response.StatusCode = (int)response.StatusCode; + context.Response.ContentType = "text/event-stream"; + context.Response.Headers.CacheControl = "no-cache"; + + using var stream = await response.Content.ReadAsStreamAsync(context.RequestAborted).ConfigureAwait(false); + await stream.CopyToAsync(context.Response.Body, context.RequestAborted).ConfigureAwait(false); + } + else + { + await CopyResponseAsync(response, context).ConfigureAwait(false); + } + } + + private (string? BackendUrl, string ActualPath) ResolveBackend(string prefixedId) + { + var backends = this.ResolveBackends(); + var slashIndex = prefixedId.IndexOf('/'); + + if (slashIndex > 0) + { + var prefix = prefixedId[..slashIndex]; + var rest = prefixedId[(slashIndex + 1)..]; + + if (backends.TryGetValue(prefix, out var url)) + { + return (url, rest); + } + } + + // Fallback: check all prefixes + foreach (var (prefix, url) in backends) + { + if (prefixedId.StartsWith(prefix + "/", StringComparison.Ordinal)) + { + return (url, prefixedId[(prefix.Length + 1)..]); + } + } + + return (null, prefixedId); + } + + private static async Task ReadRequestBodyAsync(HttpRequest request) + { + using var ms = new MemoryStream(); + await request.Body.CopyToAsync(ms).ConfigureAwait(false); + return ms.ToArray(); + } + + private static async Task CopyResponseAsync(HttpResponseMessage response, HttpContext context) + { + context.Response.StatusCode = (int)response.StatusCode; + + foreach (var header in response.Headers.Where(h => !IsHopByHopHeader(h.Key))) + { + context.Response.Headers[header.Key] = header.Value.ToArray(); + } + + foreach (var header in response.Content.Headers) + { + context.Response.Headers[header.Key] = header.Value.ToArray(); + } + + await response.Content.CopyToAsync(context.Response.Body).ConfigureAwait(false); + } + + private static bool IsHopByHopHeader(string headerName) + { + return headerName.Equals("Transfer-Encoding", StringComparison.OrdinalIgnoreCase) + || headerName.Equals("Connection", StringComparison.OrdinalIgnoreCase) + || headerName.Equals("Keep-Alive", StringComparison.OrdinalIgnoreCase) + || headerName.Equals("Host", StringComparison.OrdinalIgnoreCase); + } +} diff --git a/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/DevUIResource.cs b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/DevUIResource.cs new file mode 100644 index 0000000000..9cf85dff07 --- /dev/null +++ b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/DevUIResource.cs @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Net.Sockets; + +namespace Aspire.Hosting.ApplicationModel; + +/// +/// Represents a DevUI resource for testing AI agents in a distributed application. +/// +/// +/// DevUI aggregates agents from multiple backend services and provides a unified +/// web interface for testing and debugging AI agents using the OpenAI Responses protocol. +/// The aggregator runs as an in-process reverse proxy within the AppHost, requiring no +/// external container image. +/// +/// The name of the DevUI resource. +public class DevUIResource(string name) : Resource(name), IResourceWithEndpoints, IResourceWithWaitSupport +{ + internal const string PrimaryEndpointName = "http"; + + /// + /// Initializes a new instance of the class with endpoint annotations. + /// + /// The name of the resource. + /// An optional fixed port. If null, a dynamic port is assigned. + internal DevUIResource(string name, int? port) : this(name) + { + this.Port = port; + this.Annotations.Add(new EndpointAnnotation( + ProtocolType.Tcp, + uriScheme: "http", + name: PrimaryEndpointName, + port: port, + isProxied: false) + { + TargetHost = "localhost" + }); + } + + /// + /// Gets the optional fixed port for the DevUI web interface. + /// + internal int? Port { get; } + + /// + /// Gets the primary HTTP endpoint for the DevUI web interface. + /// + public EndpointReference PrimaryEndpoint => field ??= new(this, PrimaryEndpointName); +} diff --git a/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/README.md b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/README.md new file mode 100644 index 0000000000..8dbace2514 --- /dev/null +++ b/dotnet/src/Aspire.Hosting.AgentFramework.DevUI/README.md @@ -0,0 +1,104 @@ +# Aspire.Hosting.AgentFramework.DevUI library + +Provides extension methods and resource definitions for an Aspire AppHost to configure a DevUI resource for testing and debugging AI agents built with [Microsoft Agent Framework](https://github.com/microsoft/agent-framework). + +## Getting started + +### Prerequisites + +Agent services must expose the OpenAI Responses and Conversations API endpoints. This is compatible with services using [Microsoft Agent Framework](https://github.com/microsoft/agent-framework) with `MapOpenAIResponses()` and `MapOpenAIConversations()` mapped. + +### Install the package + +In your AppHost project, install the Aspire Agent Framework DevUI Hosting library with [NuGet](https://www.nuget.org): + +```dotnetcli +dotnet add package Aspire.Hosting.AgentFramework.DevUI +``` + +## Usage example + +Then, in the _AppHost.cs_ file of `AppHost`, add a DevUI resource and connect it to your agent services using the following methods: + +```csharp +var writerAgent = builder.AddProject("writer-agent") + .WithHttpHealthCheck("/health"); + +var editorAgent = builder.AddProject("editor-agent") + .WithHttpHealthCheck("/health"); + +var devui = builder.AddDevUI("devui") + .WithAgentService(writerAgent) + .WithAgentService(editorAgent) + .WaitFor(writerAgent) + .WaitFor(editorAgent); +``` + +Each agent service only needs to map the standard OpenAI API endpoints — no custom discovery endpoints are required: + +```csharp +// In the agent service's Program.cs +builder.AddAIAgent("writer", "You write short stories."); +builder.Services.AddOpenAIResponses(); +builder.Services.AddOpenAIConversations(); + +var app = builder.Build(); + +app.MapOpenAIResponses(); +app.MapOpenAIConversations(); +``` + +## How it works + +`AddDevUI` starts an **in-process aggregator** inside the AppHost — no external container image is needed. The aggregator is a lightweight Kestrel server that: + +1. **Serves the DevUI frontend** from the `Microsoft.Agents.AI.DevUI` assembly's embedded resources (loaded at runtime). If the assembly is not available, it falls back to proxying the frontend from the first backend. +2. **Aggregates entities** from all configured agent service backends into a single `/v1/entities` listing. Each entity ID is prefixed with the backend name to ensure uniqueness across services (e.g., `writer-agent/writer`, `editor-agent/editor`). +3. **Routes requests** to the correct backend based on the entity ID prefix. When DevUI sends a `POST /v1/responses` or `/v1/conversations` request, the aggregator strips the prefix and forwards it to the appropriate service. +4. **Streams SSE responses** for the `/v1/responses` endpoint, so agent responses stream back to the DevUI frontend in real time. + +The aggregator publishes its URL to the Aspire dashboard, where it appears as a clickable link. + +## Agent discovery + +By default, `WithAgentService` declares a single agent named after the Aspire resource. You can provide explicit agent metadata when the agent name differs from the resource name, or when a service hosts multiple agents: + +```csharp +builder.AddDevUI("devui") + .WithAgentService(writerAgent, agents: [new("writer", "Writes short stories")]) + .WithAgentService(editorAgent, agents: [new("editor", "Edits and formats stories")]); +``` + +Agent metadata is declared at the AppHost level so the aggregator builds the entity listing directly — agent services don't need a `/v1/entities` endpoint. + +## Configuration + +### Custom entity ID prefix + +By default, entity IDs are prefixed with the Aspire resource name. You can specify a custom prefix: + +```csharp +builder.AddDevUI("devui") + .WithAgentService(myService, entityIdPrefix: "custom-prefix"); +``` + +### Custom port + +You can specify a fixed host port for the DevUI web interface: + +```csharp +builder.AddDevUI("devui", port: 8090); +``` + +### DevUI frontend assembly + +To serve the DevUI frontend directly from the aggregator (instead of proxying from a backend), add the `Microsoft.Agents.AI.DevUI` NuGet package to your AppHost project. The aggregator loads its embedded resources at runtime via `Assembly.Load`. + +## Additional documentation + +* https://github.com/microsoft/agent-framework +* https://github.com/microsoft/agent-framework/tree/main/dotnet/src/Microsoft.Agents.AI.DevUI + +## Feedback & contributing + +https://github.com/dotnet/aspire diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/Kit/ActionExecutorResult.cs b/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/Kit/ActionExecutorResult.cs index 99d2e29f50..4bf2a12500 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/Kit/ActionExecutorResult.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/Kit/ActionExecutorResult.cs @@ -1,5 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. +using Microsoft.Agents.AI.Workflows.Declarative.Extensions; + namespace Microsoft.Agents.AI.Workflows.Declarative.Kit; /// @@ -25,6 +27,11 @@ public sealed record class ActionExecutorResult internal static ActionExecutorResult ThrowIfNot(object? message) { + if (message is PortableValue portableValue && portableValue.IsType(out ActionExecutorResult? unwrapped)) + { + return unwrapped; + } + if (message is not ActionExecutorResult executorMessage) { throw new DeclarativeActionException($"Unexpected message type: {message?.GetType().Name ?? "(null)"} (Expected: {nameof(ActionExecutorResult)})"); diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeAzureAgentExecutor.cs b/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeAzureAgentExecutor.cs index 24653af0f2..86efa6ec43 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeAzureAgentExecutor.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeAzureAgentExecutor.cs @@ -27,9 +27,11 @@ internal sealed class InvokeAzureAgentExecutor(InvokeAzureAgent model, ResponseA public static string Resume(string id) => $"{id}_{nameof(Resume)}"; } - public static bool RequiresInput(object? message) => message is ExternalInputRequest; + public static bool RequiresInput(object? message) => + message is ExternalInputRequest || (message is PortableValue pv && pv.IsType(out ExternalInputRequest? _)); - public static bool RequiresNothing(object? message) => message is ActionExecutorResult; + public static bool RequiresNothing(object? message) => + message is ActionExecutorResult || (message is PortableValue pv && pv.IsType(out ActionExecutorResult? _)); private AzureAgentUsage AgentUsage => Throw.IfNull(this.Model.Agent, $"{nameof(this.Model)}.{nameof(this.Model.Agent)}"); private AzureAgentInput? AgentInput => this.Model.Input; diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeMcpToolExecutor.cs b/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeMcpToolExecutor.cs index b1d9a44269..7540556f64 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeMcpToolExecutor.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows.Declarative/ObjectModel/InvokeMcpToolExecutor.cs @@ -46,12 +46,14 @@ internal sealed class InvokeMcpToolExecutor( /// /// Determines if the message indicates external input is required. /// - public static bool RequiresInput(object? message) => message is ExternalInputRequest; + public static bool RequiresInput(object? message) => + message is ExternalInputRequest || (message is PortableValue pv && pv.IsType(out ExternalInputRequest? _)); /// /// Determines if the message indicates no external input is required. /// - public static bool RequiresNothing(object? message) => message is ActionExecutorResult; + public static bool RequiresNothing(object? message) => + message is ActionExecutorResult || (message is PortableValue pv && pv.IsType(out ActionExecutorResult? _)); /// protected override bool EmitResultEvent => false; diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/AIAgentsAbstractionsExtensions.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/AIAgentsAbstractionsExtensions.cs index 165de39855..8c94f4aa85 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/AIAgentsAbstractionsExtensions.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/AIAgentsAbstractionsExtensions.cs @@ -48,7 +48,7 @@ internal static class AIAgentsAbstractionsExtensions /// any that have a different from to /// . /// - public static List? ChangeAssistantToUserForOtherParticipants(this List messages, string targetAgentName) + public static List? ChangeAssistantToUserForOtherParticipants(this IEnumerable messages, string targetAgentName) { List? roleChanged = null; foreach (var m in messages) diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/HandoffWorkflowBuilder.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/HandoffWorkflowBuilder.cs index 4c93414c63..ba21f9322b 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/HandoffWorkflowBuilder.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/HandoffWorkflowBuilder.cs @@ -219,13 +219,17 @@ public class HandoffWorkflowBuilderCore where TBuilder : HandoffWorkfl if (string.IsNullOrWhiteSpace(handoffReason)) { - handoffReason = to.Description ?? to.Name ?? (to as ChatClientAgent)?.Instructions; + handoffReason = (string.IsNullOrWhiteSpace(to.Description) ? null : to.Description) + ?? (string.IsNullOrWhiteSpace(to.Name) ? null : $"handoff to {to.Name}") + ?? to.GetService()?.Instructions; + if (string.IsNullOrWhiteSpace(handoffReason)) { Throw.ArgumentException( nameof(to), - $"The provided target agent '{to.Name ?? to.Id}' has no description, name, or instructions, and no handoff description has been provided. " + - "At least one of these is required to register a handoff so that the appropriate target agent can be chosen."); + $"The provided target agent '{(string.IsNullOrWhiteSpace(to.Name) ? to.Id : to.Name)}' has no description, name, or instructions, and no " + + "handoff description has been provided. At least one of these is required to register a handoff so that the appropriate target agent can " + + "be chosen."); } } diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffAgentExecutor.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffAgentExecutor.cs index eac2eb5687..d9acab96d5 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffAgentExecutor.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffAgentExecutor.cs @@ -6,6 +6,7 @@ using System.ComponentModel; using System.Diagnostics.CodeAnalysis; using System.Linq; using System.Text.Json; +using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.AI; @@ -31,140 +32,6 @@ internal sealed class HandoffAgentExecutorOptions public HandoffToolCallFilteringBehavior ToolCallFilteringBehavior { get; set; } = HandoffToolCallFilteringBehavior.HandoffOnly; } -[Experimental(DiagnosticConstants.ExperimentalFeatureDiagnostic)] -internal sealed class HandoffMessagesFilter -{ - private readonly HandoffToolCallFilteringBehavior _filteringBehavior; - - public HandoffMessagesFilter(HandoffToolCallFilteringBehavior filteringBehavior) - { - this._filteringBehavior = filteringBehavior; - } - - [Experimental(DiagnosticConstants.ExperimentalFeatureDiagnostic)] - internal static bool IsHandoffFunctionName(string name) - { - return name.StartsWith(HandoffWorkflowBuilder.FunctionPrefix, StringComparison.Ordinal); - } - - public IEnumerable FilterMessages(List messages) - { - if (this._filteringBehavior == HandoffToolCallFilteringBehavior.None) - { - return messages; - } - - Dictionary filteringCandidates = new(); - List filteredMessages = []; - HashSet messagesToRemove = []; - - bool filterHandoffOnly = this._filteringBehavior == HandoffToolCallFilteringBehavior.HandoffOnly; - foreach (ChatMessage unfilteredMessage in messages) - { - ChatMessage filteredMessage = unfilteredMessage.Clone(); - - // .Clone() is shallow, so we cannot modify the contents of the cloned message in place. - List contents = []; - contents.Capacity = unfilteredMessage.Contents?.Count ?? 0; - filteredMessage.Contents = contents; - - // Because this runs after the role changes from assistant to user for the target agent, we cannot rely on tool calls - // originating only from messages with the Assistant role. Instead, we need to inspect the contents of all non-Tool (result) - // FunctionCallContent. - if (unfilteredMessage.Role != ChatRole.Tool) - { - for (int i = 0; i < unfilteredMessage.Contents!.Count; i++) - { - AIContent content = unfilteredMessage.Contents[i]; - if (content is not FunctionCallContent fcc || (filterHandoffOnly && !IsHandoffFunctionName(fcc.Name))) - { - filteredMessage.Contents.Add(content); - - // Track non-handoff function calls so their tool results are preserved in HandoffOnly mode - if (filterHandoffOnly && content is FunctionCallContent nonHandoffFcc) - { - filteringCandidates[nonHandoffFcc.CallId] = new FilterCandidateState(nonHandoffFcc.CallId) - { - IsHandoffFunction = false, - }; - } - } - else if (filterHandoffOnly) - { - if (!filteringCandidates.TryGetValue(fcc.CallId, out FilterCandidateState? candidateState)) - { - filteringCandidates[fcc.CallId] = new FilterCandidateState(fcc.CallId) - { - IsHandoffFunction = true, - }; - } - else - { - candidateState.IsHandoffFunction = true; - (int messageIndex, int contentIndex) = candidateState.FunctionCallResultLocation!.Value; - ChatMessage messageToFilter = filteredMessages[messageIndex]; - messageToFilter.Contents.RemoveAt(contentIndex); - if (messageToFilter.Contents.Count == 0) - { - messagesToRemove.Add(messageIndex); - } - } - } - else - { - // All mode: strip all FunctionCallContent - } - } - } - else - { - if (!filterHandoffOnly) - { - continue; - } - - for (int i = 0; i < unfilteredMessage.Contents!.Count; i++) - { - AIContent content = unfilteredMessage.Contents[i]; - if (content is not FunctionResultContent frc - || (filteringCandidates.TryGetValue(frc.CallId, out FilterCandidateState? candidateState) - && candidateState.IsHandoffFunction is false)) - { - // Either this is not a function result content, so we should let it through, or it is a FRC that - // we know is not related to a handoff call. In either case, we should include it. - filteredMessage.Contents.Add(content); - } - else if (candidateState is null) - { - // We haven't seen the corresponding function call yet, so add it as a candidate to be filtered later - filteringCandidates[frc.CallId] = new FilterCandidateState(frc.CallId) - { - FunctionCallResultLocation = (filteredMessages.Count, filteredMessage.Contents.Count), - }; - } - // else we have seen the corresponding function call and it is a handoff, so we should filter it out. - } - } - - if (filteredMessage.Contents.Count > 0) - { - filteredMessages.Add(filteredMessage); - } - } - - return filteredMessages.Where((_, index) => !messagesToRemove.Contains(index)); - } - - private class FilterCandidateState(string callId) - { - public (int MessageIndex, int ContentIndex)? FunctionCallResultLocation { get; set; } - - public string CallId => callId; - - public bool? IsHandoffFunction { get; set; } - } -} - internal struct AgentInvocationResult(AgentResponse agentResponse, string? handoffTargetId) { public AgentResponse Response => agentResponse; @@ -175,19 +42,31 @@ internal struct AgentInvocationResult(AgentResponse agentResponse, string? hando public bool IsHandoffRequested => this.HandoffTargetId != null; } -internal record HandoffAgentHostState(HandoffState? CurrentTurnState, List FilteredIncomingMessages, List TurnMessages) +internal record HandoffAgentHostState( + HandoffState? IncomingState, + int ConversationBookmark) { - public HandoffState PrepareHandoff(AgentInvocationResult invocationResult, string currentAgentId) - { - if (this.CurrentTurnState == null) - { - throw new InvalidOperationException("Cannot create a handoff request: Out of turn."); - } + [MemberNotNullWhen(true, nameof(IncomingState))] + [JsonIgnore] + public bool IsTakingTurn => this.IncomingState != null; +} - IEnumerable allMessages = [.. this.CurrentTurnState.Messages, .. this.TurnMessages, .. invocationResult.Response.Messages]; +internal sealed record StateRef(string Key, string? ScopeName) +{ + public ValueTask InvokeWithStateAsync(Func> invocation, + IWorkflowContext context, + CancellationToken cancellationToken) + => context.InvokeWithStateAsync(invocation, this.Key, this.ScopeName, cancellationToken); - return new(this.CurrentTurnState.TurnToken, invocationResult.HandoffTargetId, allMessages.ToList(), currentAgentId); - } + public ValueTask InvokeWithStateAsync(Func invocation, + IWorkflowContext context, + CancellationToken cancellationToken) + => context.InvokeWithStateAsync( + async (state, ctx, ct) => + { + await invocation(state, ctx, ct).ConfigureAwait(false); + return state; + }, this.Key, this.ScopeName, cancellationToken); } /// Executor used to represent an agent in a handoffs workflow, responding to events. @@ -208,7 +87,13 @@ internal sealed class HandoffAgentExecutor : private readonly HashSet _handoffFunctionNames = []; private readonly Dictionary _handoffFunctionToAgentId = []; - private static HandoffAgentHostState InitialStateFactory() => new(null, [], []); + private readonly StateRef _sharedStateRef = new(HandoffConstants.HandoffSharedStateKey, + HandoffConstants.HandoffSharedStateScope); + + internal const string AgentSessionKey = nameof(AgentSession); + private AgentSession? _session; + + private static HandoffAgentHostState InitialStateFactory() => new(null, 0); public HandoffAgentExecutor(AIAgent agent, HashSet handoffs, HandoffAgentExecutorOptions options) : base(IdFor(agent), InitialStateFactory) @@ -291,13 +176,18 @@ internal sealed class HandoffAgentExecutor : // resumes can be processed in one invocation. return this.InvokeWithStateAsync((state, ctx, ct) => { - state.TurnMessages.Add(new ChatMessage(ChatRole.User, [response]) + if (!state.IsTakingTurn) + { + throw new InvalidOperationException("Cannot process user responses when not taking a turn in Handoff Orchestration."); + } + + ChatMessage userMessage = new(ChatRole.User, [response]) { CreatedAt = DateTimeOffset.UtcNow, MessageId = Guid.NewGuid().ToString("N"), - }); + }; - return this.ContinueTurnAsync(state, ctx, ct); + return this.ContinueTurnAsync(state, [userMessage], ctx, ct); }, context, skipCache: false, cancellationToken); } @@ -315,24 +205,44 @@ internal sealed class HandoffAgentExecutor : // resumes can be processed in one invocation. return this.InvokeWithStateAsync((state, ctx, ct) => { - state.TurnMessages.Add( - new ChatMessage(ChatRole.Tool, [result]) - { - AuthorName = this._agent.Name ?? this._agent.Id, - CreatedAt = DateTimeOffset.UtcNow, - MessageId = Guid.NewGuid().ToString("N"), - }); + if (!state.IsTakingTurn) + { + throw new InvalidOperationException("Cannot process user responses in when not taking a turn in Handoff Orchestration."); + } - return this.ContinueTurnAsync(state, ctx, ct); + ChatMessage toolMessage = new(ChatRole.Tool, [result]) + { + AuthorName = this._agent.Name ?? this._agent.Id, + CreatedAt = DateTimeOffset.UtcNow, + MessageId = Guid.NewGuid().ToString("N"), + }; + + return this.ContinueTurnAsync(state, [toolMessage], ctx, ct); }, context, skipCache: false, cancellationToken); } - private async ValueTask ContinueTurnAsync(HandoffAgentHostState state, IWorkflowContext context, CancellationToken cancellationToken) + private async ValueTask ContinueTurnAsync(HandoffAgentHostState state, List incomingMessages, IWorkflowContext context, CancellationToken cancellationToken, bool skipAddIncoming = false) { - List? roleChanges = state.FilteredIncomingMessages.ChangeAssistantToUserForOtherParticipants(this._agent.Name ?? this._agent.Id); + if (!state.IsTakingTurn) + { + throw new InvalidOperationException("Cannot process user responses in when not taking a turn in Handoff Orchestration."); + } - bool emitUpdateEvents = state.CurrentTurnState!.ShouldEmitStreamingEvents(this._options.EmitAgentResponseUpdateEvents); - AgentInvocationResult result = await this.InvokeAgentAsync([.. state.FilteredIncomingMessages, .. state.TurnMessages], context, emitUpdateEvents, cancellationToken) + // If a handoff was invoked by a previous agent, filter out the handoff function call and tool result messages + // before sending to the underlying agent. These are internal workflow mechanics that confuse the target model + // into ignoring the original user question. + // + // This will not filter out tool responses and approval responses that are part of this agent's turn, which is + // the expected behavior since those are part of the agent's reasoning process. + HandoffMessagesFilter handoffMessagesFilter = new(this._options.ToolCallFilteringBehavior); + IEnumerable messagesForAgent = state.IncomingState.RequestedHandoffTargetAgentId is not null + ? handoffMessagesFilter.FilterMessages(incomingMessages) + : incomingMessages; + + List? roleChanges = messagesForAgent.ChangeAssistantToUserForOtherParticipants(this._agent.Name ?? this._agent.Id); + + bool emitUpdateEvents = state.IncomingState!.ShouldEmitStreamingEvents(this._options.EmitAgentResponseUpdateEvents); + AgentInvocationResult result = await this.InvokeAgentAsync(messagesForAgent, context, emitUpdateEvents, cancellationToken) .ConfigureAwait(false); if (this.HasOutstandingRequests && result.IsHandoffRequested) @@ -342,20 +252,40 @@ internal sealed class HandoffAgentExecutor : roleChanges.ResetUserToAssistantForChangedRoles(); + int newConversationBookmark = state.ConversationBookmark; + await this._sharedStateRef.InvokeWithStateAsync( + (sharedState, ctx, ct) => + { + if (sharedState == null) + { + throw new InvalidOperationException("Handoff Orchestration shared state was not properly initialized."); + } + + if (!skipAddIncoming) + { + sharedState.Conversation.AddMessages(incomingMessages); + } + + newConversationBookmark = sharedState.Conversation.AddMessages(result.Response.Messages); + + return new ValueTask(); + }, + context, + cancellationToken).ConfigureAwait(false); + // We send on the HandoffState even if handoff is not requested because we might be terminating the processing, but this only // happens if we have no outstanding requests. if (!this.HasOutstandingRequests) { - HandoffState outgoingState = state.PrepareHandoff(result, this._agent.Id); + HandoffState outgoingState = new(state.IncomingState.TurnToken, result.HandoffTargetId, this._agent.Id); await context.SendMessageAsync(outgoingState, cancellationToken).ConfigureAwait(false); - // reset the state for the next handoff (return-to-current is modeled as a new handoff turn, as opposed to "HITL", which - // can be a bit confusing.) - return null; + // reset the state for the next handoff, making sure to keep track of the conversation bookmark, and avoid resetting the + // agent session. (return-to-current is modeled as a new handoff turn, as opposed to "HITL", which can be a bit confusing.) + return state with { IncomingState = null, ConversationBookmark = newConversationBookmark }; } - state.TurnMessages.AddRange(result.Response.Messages); return state; } @@ -363,28 +293,36 @@ internal sealed class HandoffAgentExecutor : { return this.InvokeWithStateAsync(InvokeContinueTurnAsync, context, skipCache: false, cancellationToken); - ValueTask InvokeContinueTurnAsync(HandoffAgentHostState state, IWorkflowContext context, CancellationToken cancellationToken) + async ValueTask InvokeContinueTurnAsync(HandoffAgentHostState state, IWorkflowContext context, CancellationToken cancellationToken) { // Check that we are not getting this message while in the middle of a turn - if (state.CurrentTurnState != null) + if (state.IsTakingTurn) { throw new InvalidOperationException("Cannot have multiple simultaneous conversations in Handoff Orchestration."); } - // If a handoff was invoked by a previous agent, filter out the handoff function - // call and tool result messages before sending to the underlying agent. These - // are internal workflow mechanics that confuse the target model into ignoring the - // original user question. - HandoffMessagesFilter handoffMessagesFilter = new(this._options.ToolCallFilteringBehavior); - IEnumerable messagesForAgent = message.RequestedHandoffTargetAgentId is not null - ? handoffMessagesFilter.FilterMessages(message.Messages) - : message.Messages; + IEnumerable newConversationMessages = []; + int newConversationBookmark = 0; - // This works because the runtime guarantees that a given executor instance will process messages serially, - // though there is no global cross-executor ordering guarantee (and in turn, no canonical message delivery order) - state = new(message, messagesForAgent.ToList(), []); + await this._sharedStateRef.InvokeWithStateAsync( + (sharedState, ctx, ct) => + { + if (sharedState == null) + { + throw new InvalidOperationException("Handoff Orchestration shared state was not properly initialized."); + } - return this.ContinueTurnAsync(state, context, cancellationToken); + (newConversationMessages, newConversationBookmark) = sharedState.Conversation.CollectNewMessages(state.ConversationBookmark); + + return new ValueTask(); + }, + context, + cancellationToken).ConfigureAwait(false); + + state = state with { IncomingState = message, ConversationBookmark = newConversationBookmark }; + + return await this.ContinueTurnAsync(state, newConversationMessages.ToList(), context, cancellationToken, skipAddIncoming: true) + .ConfigureAwait(false); } } @@ -395,18 +333,35 @@ internal sealed class HandoffAgentExecutor : { Task userInputRequestsTask = this._userInputHandler?.OnCheckpointingAsync(UserInputRequestStateKey, context, cancellationToken).AsTask() ?? Task.CompletedTask; Task functionCallRequestsTask = this._functionCallHandler?.OnCheckpointingAsync(FunctionCallRequestStateKey, context, cancellationToken).AsTask() ?? Task.CompletedTask; + Task agentSessionTask = CheckpointAgentSessionAsync(); Task baseTask = base.OnCheckpointingAsync(context, cancellationToken).AsTask(); - await Task.WhenAll(userInputRequestsTask, functionCallRequestsTask, baseTask).ConfigureAwait(false); + await Task.WhenAll(userInputRequestsTask, functionCallRequestsTask, agentSessionTask, baseTask).ConfigureAwait(false); + + async Task CheckpointAgentSessionAsync() + { + JsonElement? sessionState = this._session is not null ? await this._agent.SerializeSessionAsync(this._session, cancellationToken: cancellationToken).ConfigureAwait(false) : null; + await context.QueueStateUpdateAsync(AgentSessionKey, sessionState, cancellationToken: cancellationToken).ConfigureAwait(false); + } } protected internal override async ValueTask OnCheckpointRestoredAsync(IWorkflowContext context, CancellationToken cancellationToken = default) { Task userInputRestoreTask = this._userInputHandler?.OnCheckpointRestoredAsync(UserInputRequestStateKey, context, cancellationToken).AsTask() ?? Task.CompletedTask; Task functionCallRestoreTask = this._functionCallHandler?.OnCheckpointRestoredAsync(FunctionCallRequestStateKey, context, cancellationToken).AsTask() ?? Task.CompletedTask; + Task agentSessionTask = RestoreAgentSessionAsync(); - await Task.WhenAll(userInputRestoreTask, functionCallRestoreTask).ConfigureAwait(false); + await Task.WhenAll(userInputRestoreTask, functionCallRestoreTask, agentSessionTask).ConfigureAwait(false); await base.OnCheckpointRestoredAsync(context, cancellationToken).ConfigureAwait(false); + + async Task RestoreAgentSessionAsync() + { + JsonElement? sessionState = await context.ReadStateAsync(AgentSessionKey, cancellationToken: cancellationToken).ConfigureAwait(false); + if (sessionState.HasValue) + { + this._session = await this._agent.DeserializeSessionAsync(sessionState.Value, cancellationToken: cancellationToken).ConfigureAwait(false); + } + } } private bool HasOutstandingRequests => (this._userInputHandler?.HasPendingRequests == true) || (this._functionCallHandler?.HasPendingRequests == true); @@ -417,31 +372,43 @@ internal sealed class HandoffAgentExecutor : AIAgentUnservicedRequestsCollector collector = new(this._userInputHandler, this._functionCallHandler); - IAsyncEnumerable agentStream = this._agent.RunStreamingAsync( - messages, - options: this._agentOptions, - cancellationToken: cancellationToken); - string? requestedHandoff = null; List updates = []; List candidateRequests = []; - await foreach (AgentResponseUpdate update in agentStream.ConfigureAwait(false)) - { - await AddUpdateAsync(update, cancellationToken).ConfigureAwait(false); - collector.ProcessAgentResponseUpdate(update, CollectHandoffRequestsFilter); - - bool CollectHandoffRequestsFilter(FunctionCallContent candidateHandoffRequest) + await this.InvokeWithStateAsync( + async (state, ctx, ct) => { - bool isHandoffRequest = this._handoffFunctionNames.Contains(candidateHandoffRequest.Name); - if (isHandoffRequest) + this._session ??= await this._agent.CreateSessionAsync(ct).ConfigureAwait(false); + + IAsyncEnumerable agentStream = + this._agent.RunStreamingAsync(messages, + this._session, + options: this._agentOptions, + cancellationToken: ct); + + await foreach (AgentResponseUpdate update in agentStream.ConfigureAwait(false)) { - candidateRequests.Add(candidateHandoffRequest); + await AddUpdateAsync(update, ct).ConfigureAwait(false); + + collector.ProcessAgentResponseUpdate(update, CollectHandoffRequestsFilter); + + bool CollectHandoffRequestsFilter(FunctionCallContent candidateHandoffRequest) + { + bool isHandoffRequest = this._handoffFunctionNames.Contains(candidateHandoffRequest.Name); + if (isHandoffRequest) + { + candidateRequests.Add(candidateHandoffRequest); + } + + return !isHandoffRequest; + } } - return !isHandoffRequest; - } - } + return state; + }, + context, + cancellationToken: cancellationToken).ConfigureAwait(false); if (candidateRequests.Count > 1) { diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffEndExecutor.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffEndExecutor.cs index 0ba8fc3501..c9c75b91c4 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffEndExecutor.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffEndExecutor.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; @@ -12,23 +13,33 @@ internal sealed class HandoffEndExecutor(bool returnToPrevious) : Executor(Execu { public const string ExecutorId = "HandoffEnd"; + private readonly StateRef _sharedStateRef = new(HandoffConstants.HandoffSharedStateKey, + HandoffConstants.HandoffSharedStateScope); + protected override ProtocolBuilder ConfigureProtocol(ProtocolBuilder protocolBuilder) => - protocolBuilder.ConfigureRoutes(routeBuilder => routeBuilder.AddHandler((handoff, context, cancellationToken) => - this.HandleAsync(handoff, context, cancellationToken))) + protocolBuilder.ConfigureRoutes(routeBuilder => routeBuilder.AddHandler( + (handoff, context, cancellationToken) => this.HandleAsync(handoff, context, cancellationToken))) .YieldsOutput>(); private async ValueTask HandleAsync(HandoffState handoff, IWorkflowContext context, CancellationToken cancellationToken) { - if (returnToPrevious) - { - await context.QueueStateUpdateAsync(HandoffConstants.PreviousAgentTrackerKey, - handoff.PreviousAgentId, - HandoffConstants.PreviousAgentTrackerScope, - cancellationToken) - .ConfigureAwait(false); - } + await this._sharedStateRef.InvokeWithStateAsync( + async (HandoffSharedState? sharedState, IWorkflowContext context, CancellationToken cancellationToken) => + { + if (sharedState == null) + { + throw new InvalidOperationException("Handoff Orchestration shared state was not properly initialized."); + } - await context.YieldOutputAsync(handoff.Messages, cancellationToken).ConfigureAwait(false); + if (returnToPrevious) + { + sharedState.PreviousAgentId = handoff.PreviousAgentId; + } + + await context.YieldOutputAsync(sharedState.Conversation.CloneAllMessages(), cancellationToken).ConfigureAwait(false); + + return sharedState; + }, context, cancellationToken).ConfigureAwait(false); } public ValueTask ResetAsync() => default; diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffMessagesFilter.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffMessagesFilter.cs new file mode 100644 index 0000000000..7bc178c2d4 --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffMessagesFilter.cs @@ -0,0 +1,143 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; +using Microsoft.Extensions.AI; + +namespace Microsoft.Agents.AI.Workflows.Specialized; + +[Experimental(DiagnosticConstants.ExperimentalFeatureDiagnostic)] +internal sealed class HandoffMessagesFilter +{ + private readonly HandoffToolCallFilteringBehavior _filteringBehavior; + + public HandoffMessagesFilter(HandoffToolCallFilteringBehavior filteringBehavior) + { + this._filteringBehavior = filteringBehavior; + } + + [Experimental(DiagnosticConstants.ExperimentalFeatureDiagnostic)] + internal static bool IsHandoffFunctionName(string name) + { + return name.StartsWith(HandoffWorkflowBuilder.FunctionPrefix, StringComparison.Ordinal); + } + + public IEnumerable FilterMessages(IEnumerable messages) + { + if (this._filteringBehavior == HandoffToolCallFilteringBehavior.None) + { + return messages; + } + + Dictionary filteringCandidates = new(); + List filteredMessages = []; + HashSet messagesToRemove = []; + + bool filterHandoffOnly = this._filteringBehavior == HandoffToolCallFilteringBehavior.HandoffOnly; + foreach (ChatMessage unfilteredMessage in messages) + { + ChatMessage filteredMessage = unfilteredMessage.Clone(); + + // .Clone() is shallow, so we cannot modify the contents of the cloned message in place. + List contents = []; + contents.Capacity = unfilteredMessage.Contents?.Count ?? 0; + filteredMessage.Contents = contents; + + // Because this runs after the role changes from assistant to user for the target agent, we cannot rely on tool calls + // originating only from messages with the Assistant role. Instead, we need to inspect the contents of all non-Tool (result) + // FunctionCallContent. + if (unfilteredMessage.Role != ChatRole.Tool) + { + for (int i = 0; i < unfilteredMessage.Contents!.Count; i++) + { + AIContent content = unfilteredMessage.Contents[i]; + if (content is not FunctionCallContent fcc || (filterHandoffOnly && !IsHandoffFunctionName(fcc.Name))) + { + filteredMessage.Contents.Add(content); + + // Track non-handoff function calls so their tool results are preserved in HandoffOnly mode + if (filterHandoffOnly && content is FunctionCallContent nonHandoffFcc) + { + filteringCandidates[nonHandoffFcc.CallId] = new FilterCandidateState(nonHandoffFcc.CallId) + { + IsHandoffFunction = false, + }; + } + } + else if (filterHandoffOnly) + { + if (!filteringCandidates.TryGetValue(fcc.CallId, out FilterCandidateState? candidateState)) + { + filteringCandidates[fcc.CallId] = new FilterCandidateState(fcc.CallId) + { + IsHandoffFunction = true, + }; + } + else + { + candidateState.IsHandoffFunction = true; + (int messageIndex, int contentIndex) = candidateState.FunctionCallResultLocation!.Value; + ChatMessage messageToFilter = filteredMessages[messageIndex]; + messageToFilter.Contents.RemoveAt(contentIndex); + if (messageToFilter.Contents.Count == 0) + { + messagesToRemove.Add(messageIndex); + } + } + } + else + { + // All mode: strip all FunctionCallContent + } + } + } + else + { + if (!filterHandoffOnly) + { + continue; + } + + for (int i = 0; i < unfilteredMessage.Contents!.Count; i++) + { + AIContent content = unfilteredMessage.Contents[i]; + if (content is not FunctionResultContent frc + || (filteringCandidates.TryGetValue(frc.CallId, out FilterCandidateState? candidateState) + && candidateState.IsHandoffFunction is false)) + { + // Either this is not a function result content, so we should let it through, or it is a FRC that + // we know is not related to a handoff call. In either case, we should include it. + filteredMessage.Contents.Add(content); + } + else if (candidateState is null) + { + // We haven't seen the corresponding function call yet, so add it as a candidate to be filtered later + filteringCandidates[frc.CallId] = new FilterCandidateState(frc.CallId) + { + FunctionCallResultLocation = (filteredMessages.Count, filteredMessage.Contents.Count), + }; + } + // else we have seen the corresponding function call and it is a handoff, so we should filter it out. + } + } + + if (filteredMessage.Contents.Count > 0) + { + filteredMessages.Add(filteredMessage); + } + } + + return filteredMessages.Where((_, index) => !messagesToRemove.Contains(index)); + } + + private class FilterCandidateState(string callId) + { + public (int MessageIndex, int ContentIndex)? FunctionCallResultLocation { get; set; } + + public string CallId => callId; + + public bool? IsHandoffFunction { get; set; } + } +} diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffStartExecutor.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffStartExecutor.cs index 063f73bb6f..223517c35f 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffStartExecutor.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffStartExecutor.cs @@ -9,8 +9,23 @@ namespace Microsoft.Agents.AI.Workflows.Specialized; internal static class HandoffConstants { + internal const string HandoffOrchestrationSharedScope = "HandoffOrchestration"; + internal const string PreviousAgentTrackerKey = "LastAgentId"; - internal const string PreviousAgentTrackerScope = "HandoffOrchestration"; + internal const string PreviousAgentTrackerScope = HandoffOrchestrationSharedScope; + + internal const string MultiPartyConversationKey = "MultiPartyConversation"; + internal const string MultiPartyConversationScope = HandoffOrchestrationSharedScope; + + internal const string HandoffSharedStateKey = "SharedState"; + internal const string HandoffSharedStateScope = HandoffOrchestrationSharedScope; +} + +internal sealed class HandoffSharedState +{ + public MultiPartyConversation Conversation { get; } = new(); + + public string? PreviousAgentId { get; set; } } /// Executor used at the start of a handoffs workflow to accumulate messages and emit them as HandoffState upon receiving a turn token. @@ -29,23 +44,25 @@ internal sealed class HandoffStartExecutor(bool returnToPrevious) : ChatProtocol protected override ValueTask TakeTurnAsync(List messages, IWorkflowContext context, bool? emitEvents, CancellationToken cancellationToken = default) { - if (returnToPrevious) - { - return context.InvokeWithStateAsync( - async (string? previousAgentId, IWorkflowContext context, CancellationToken cancellationToken) => - { - HandoffState handoffState = new(new(emitEvents), null, messages, previousAgentId); - await context.SendMessageAsync(handoffState, cancellationToken).ConfigureAwait(false); + return context.InvokeWithStateAsync( + async (HandoffSharedState? sharedState, IWorkflowContext context, CancellationToken cancellationToken) => + { + sharedState ??= new HandoffSharedState(); + sharedState.Conversation.AddMessages(messages); - return previousAgentId; - }, - HandoffConstants.PreviousAgentTrackerKey, - HandoffConstants.PreviousAgentTrackerScope, - cancellationToken); - } + string? previousAgentId = sharedState.PreviousAgentId; - HandoffState handoff = new(new(emitEvents), null, messages); - return context.SendMessageAsync(handoff, cancellationToken); + // If we are configured to return to the previous agent, include the previous agent id in the handoff state. + // If there was no previousAgent, it will still be null. + HandoffState turnState = new(new(emitEvents), null, returnToPrevious ? previousAgentId : null); + + await context.SendMessageAsync(turnState, cancellationToken).ConfigureAwait(false); + + return sharedState; + }, + HandoffConstants.HandoffSharedStateKey, + HandoffConstants.HandoffSharedStateScope, + cancellationToken); } public new ValueTask ResetAsync() => base.ResetAsync(); diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffState.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffState.cs index 644bc7df0e..24cf788cb8 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffState.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/HandoffState.cs @@ -1,12 +1,8 @@ // Copyright (c) Microsoft. All rights reserved. -using System.Collections.Generic; -using Microsoft.Extensions.AI; - namespace Microsoft.Agents.AI.Workflows.Specialized; internal sealed record class HandoffState( TurnToken TurnToken, string? RequestedHandoffTargetAgentId, - List Messages, string? PreviousAgentId = null); diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/MultiPartyConversation.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/MultiPartyConversation.cs new file mode 100644 index 0000000000..4d59184f0d --- /dev/null +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Specialized/MultiPartyConversation.cs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.Extensions.AI; + +namespace Microsoft.Agents.AI.Workflows.Specialized; + +internal sealed class MultiPartyConversation +{ + private readonly List _history = []; + private readonly object _mutex = new(); + + public List CloneAllMessages() + { + lock (this._mutex) + { + return this._history.ToList(); + } + } + + public (ChatMessage[], int) CollectNewMessages(int bookmark) + { + lock (this._mutex) + { + int count = this._history.Count - bookmark; + if (count < 0) + { + throw new InvalidOperationException($"Bookmark value too large: {bookmark} vs count={count}"); + } + + return (this._history.Skip(bookmark).ToArray(), this.CurrentBookmark); + } + } + + private int CurrentBookmark => this._history.Count; + + public int AddMessages(IEnumerable messages) + { + lock (this._mutex) + { + this._history.AddRange(messages); + return this.CurrentBookmark; + } + } + + public int AddMessage(ChatMessage message) + { + lock (this._mutex) + { + this._history.Add(message); + return this.CurrentBookmark; + } + } +} diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowHostingExtensions.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowHostingExtensions.cs index 281d0694ac..e91531513d 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowHostingExtensions.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowHostingExtensions.cs @@ -41,7 +41,7 @@ public static class WorkflowHostingExtensions { Dictionary parameters = new() { - { "data", request.Data} + { "data", request.Data } }; return new FunctionCallContent(request.RequestId, request.PortInfo.PortId, parameters); diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowSession.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowSession.cs index c1f81f0ecf..d015ecdcee 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowSession.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowSession.cs @@ -247,7 +247,7 @@ internal sealed class WorkflowSession : AgentSession hasMatchedResponseForStartExecutor |= string.Equals(responseExecutorId, this._workflow.StartExecutorId, StringComparison.Ordinal); } - AIContent normalizedResponseContent = NormalizeResponseContentForDelivery(content, pendingRequest); + object normalizedResponseContent = NormalizeResponseContentForDelivery(content, pendingRequest); externalResponses.Add((pendingRequest.CreateResponse(normalizedResponseContent), pendingRequest.RequestId)); (matchedContentIds ??= new(StringComparer.Ordinal)).Add(contentId); } @@ -303,14 +303,35 @@ internal sealed class WorkflowSession : AgentSession /// /// Rewrites workflow-facing response content back to the original agent-owned content ID. /// - private static AIContent NormalizeResponseContentForDelivery(AIContent content, ExternalRequest request) => content switch + private static object NormalizeResponseContentForDelivery(AIContent content, ExternalRequest request) { - FunctionResultContent functionResultContent when request.TryGetDataAs(out FunctionCallContent? functionCallContent) - => CloneFunctionResultContent(functionResultContent, functionCallContent.CallId), - ToolApprovalResponseContent toolApprovalResponseContent when request.TryGetDataAs(out ToolApprovalRequestContent? toolApprovalRequestContent) - => CloneToolApprovalResponseContent(toolApprovalResponseContent, toolApprovalRequestContent.RequestId), - _ => content, - }; + switch (content) + { + // If we got a FRC, and were expecting a FRC (because the request started out as a FCC, rather than getting converted to + // on at the WorkflowSession boundary), clone it and send it in. + case FunctionResultContent functionResultContent when request.TryGetDataAs(out FunctionCallContent? functionCallContent): + return CloneFunctionResultContent(functionResultContent, functionCallContent.CallId); + case FunctionResultContent functionResultContent when !request.PortInfo.ResponseType.IsMatchPolymorphic(typeof(FunctionResultContent)): + { + object? result = functionResultContent.Result; + if (result != null) + { + if (request.PortInfo.ResponseType.IsMatchPolymorphic(result.GetType()) || result is PortableValue) + { + return result; + } + + throw new InvalidOperationException($"Unexpected result type in FunctionResultContent {result.GetType()}; expecting {request.PortInfo.ResponseType}"); + } + + throw new NotSupportedException($"Null result is not supported when using RequestPort with non-AIContent-typed requests. {functionResultContent}"); + } + case ToolApprovalResponseContent toolApprovalResponseContent when request.TryGetDataAs(out ToolApprovalRequestContent? toolApprovalRequestContent): + return CloneToolApprovalResponseContent(toolApprovalResponseContent, toolApprovalRequestContent.RequestId); + default: + return content; + } + } /// /// Gets the workflow-facing request ID from response content types. diff --git a/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentEntityInfoTests.cs b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentEntityInfoTests.cs new file mode 100644 index 0000000000..84273d6891 --- /dev/null +++ b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentEntityInfoTests.cs @@ -0,0 +1,184 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Aspire.Hosting.AgentFramework.DevUI.UnitTests; + +/// +/// Unit tests for the record. +/// +public class AgentEntityInfoTests +{ + #region Constructor Tests + + /// + /// Verifies that the Id property is set from the constructor parameter. + /// + [Fact] + public void Constructor_WithId_SetsIdProperty() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent"); + + // Assert + Assert.Equal("test-agent", info.Id); + } + + /// + /// Verifies that the Description property is set when provided. + /// + [Fact] + public void Constructor_WithDescription_SetsDescriptionProperty() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent", "A test agent"); + + // Assert + Assert.Equal("A test agent", info.Description); + } + + /// + /// Verifies that the Description property is null when not provided. + /// + [Fact] + public void Constructor_WithoutDescription_DescriptionIsNull() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent"); + + // Assert + Assert.Null(info.Description); + } + + #endregion + + #region Default Value Tests + + /// + /// Verifies that Name defaults to the Id value when not explicitly set. + /// + [Fact] + public void Name_NotSet_DefaultsToId() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent"); + + // Assert + Assert.Equal("test-agent", info.Name); + } + + /// + /// Verifies that Name can be overridden with a custom value. + /// + [Fact] + public void Name_Set_ReturnsCustomValue() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent") { Name = "Custom Name" }; + + // Assert + Assert.Equal("Custom Name", info.Name); + } + + /// + /// Verifies that Type defaults to "agent". + /// + [Fact] + public void Type_NotSet_DefaultsToAgent() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent"); + + // Assert + Assert.Equal("agent", info.Type); + } + + /// + /// Verifies that Type can be overridden with a custom value. + /// + [Fact] + public void Type_Set_ReturnsCustomValue() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent") { Type = "workflow" }; + + // Assert + Assert.Equal("workflow", info.Type); + } + + /// + /// Verifies that Framework defaults to "agent_framework". + /// + [Fact] + public void Framework_NotSet_DefaultsToAgentFramework() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent"); + + // Assert + Assert.Equal("agent_framework", info.Framework); + } + + /// + /// Verifies that Framework can be overridden with a custom value. + /// + [Fact] + public void Framework_Set_ReturnsCustomValue() + { + // Arrange & Act + var info = new AgentEntityInfo("test-agent") { Framework = "custom_framework" }; + + // Assert + Assert.Equal("custom_framework", info.Framework); + } + + #endregion + + #region Record Equality Tests + + /// + /// Verifies that two AgentEntityInfo records with identical values are equal. + /// + [Fact] + public void Equality_SameValues_AreEqual() + { + // Arrange + var info1 = new AgentEntityInfo("agent", "description"); + var info2 = new AgentEntityInfo("agent", "description"); + + // Assert + Assert.Equal(info1, info2); + } + + /// + /// Verifies that two AgentEntityInfo records with different Ids are not equal. + /// + [Fact] + public void Equality_DifferentIds_AreNotEqual() + { + // Arrange + var info1 = new AgentEntityInfo("agent1"); + var info2 = new AgentEntityInfo("agent2"); + + // Assert + Assert.NotEqual(info1, info2); + } + + /// + /// Verifies that with-expression creates a modified copy. + /// + [Fact] + public void WithExpression_ModifiesProperty_CreatesNewInstance() + { + // Arrange + var original = new AgentEntityInfo("agent", "Original description"); + + // Act + var modified = original with { Description = "Modified description" }; + + // Assert + Assert.Equal("Original description", original.Description); + Assert.Equal("Modified description", modified.Description); + Assert.Equal(original.Id, modified.Id); + } + + #endregion +} diff --git a/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentFrameworkBuilderExtensionsTests.cs b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentFrameworkBuilderExtensionsTests.cs new file mode 100644 index 0000000000..21699e3d64 --- /dev/null +++ b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentFrameworkBuilderExtensionsTests.cs @@ -0,0 +1,567 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using Aspire.Hosting.ApplicationModel; +using Moq; + +namespace Aspire.Hosting.AgentFramework.DevUI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class AgentFrameworkBuilderExtensionsTests +{ + #region AddDevUI Validation Tests + + /// + /// Verifies that AddDevUI throws ArgumentNullException when builder is null. + /// + [Fact] + public void AddDevUI_NullBuilder_ThrowsArgumentNullException() + { + // Act & Assert + var exception = Assert.Throws( + () => AgentFrameworkBuilderExtensions.AddDevUI(null!, "devui")); + Assert.Equal("builder", exception.ParamName); + } + + /// + /// Verifies that AddDevUI throws ArgumentNullException when name is null. + /// + [Fact] + public void AddDevUI_NullName_ThrowsArgumentNullException() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + + // Act & Assert + var exception = Assert.Throws( + () => builder.AddDevUI(null!)); + Assert.Equal("name", exception.ParamName); + } + + /// + /// Verifies that AddDevUI creates a resource with the specified name. + /// + [Fact] + public void AddDevUI_ValidName_CreatesResourceWithName() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + + // Act + var resourceBuilder = builder.AddDevUI("my-devui"); + + // Assert + Assert.Equal("my-devui", resourceBuilder.Resource.Name); + } + + /// + /// Verifies that AddDevUI creates a DevUIResource. + /// + [Fact] + public void AddDevUI_ReturnsDevUIResourceBuilder() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + + // Act + var resourceBuilder = builder.AddDevUI("devui"); + + // Assert + Assert.IsType(resourceBuilder.Resource); + } + + /// + /// Verifies that AddDevUI with port configures the endpoint. + /// + [Fact] + public void AddDevUI_WithPort_ConfiguresEndpointWithPort() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + + // Act + var resourceBuilder = builder.AddDevUI("devui", port: 8090); + + // Assert + var endpoint = resourceBuilder.Resource.Annotations + .OfType() + .FirstOrDefault(e => e.Name == "http"); + Assert.NotNull(endpoint); + Assert.Equal(8090, endpoint.Port); + } + + /// + /// Verifies that AddDevUI without port leaves port as null for dynamic allocation. + /// + [Fact] + public void AddDevUI_WithoutPort_EndpointHasDynamicPort() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + + // Act + var resourceBuilder = builder.AddDevUI("devui"); + + // Assert + var endpoint = resourceBuilder.Resource.Annotations + .OfType() + .FirstOrDefault(e => e.Name == "http"); + Assert.NotNull(endpoint); + Assert.Null(endpoint.Port); + } + + #endregion + + #region WithAgentService Validation Tests + + /// + /// Verifies that WithAgentService throws ArgumentNullException when builder is null. + /// + [Fact] + public void WithAgentService_NullBuilder_ThrowsArgumentNullException() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var mockAgentService = CreateMockAgentServiceBuilder(appBuilder, "agent-service"); + + // Act & Assert + var exception = Assert.Throws( + () => AgentFrameworkBuilderExtensions.WithAgentService(null!, mockAgentService)); + Assert.Equal("builder", exception.ParamName); + } + + /// + /// Verifies that WithAgentService throws ArgumentNullException when agentService is null. + /// + [Fact] + public void WithAgentService_NullAgentService_ThrowsArgumentNullException() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + + // Act & Assert + var exception = Assert.Throws( + () => devuiBuilder.WithAgentService(null!)); + Assert.Equal("agentService", exception.ParamName); + } + + #endregion + + #region WithAgentService Annotation Tests + + /// + /// Verifies that WithAgentService adds an AgentServiceAnnotation to the resource. + /// + [Fact] + public void WithAgentService_ValidService_AddsAnnotation() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act + devuiBuilder.WithAgentService(agentService); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .FirstOrDefault(); + Assert.NotNull(annotation); + Assert.Same(agentService.Resource, annotation.AgentService); + } + + /// + /// Verifies that WithAgentService defaults to agent name being the resource name. + /// + [Fact] + public void WithAgentService_NoAgents_DefaultsToResourceNameAsAgent() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act + devuiBuilder.WithAgentService(agentService); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .First(); + Assert.Single(annotation.Agents); + Assert.Equal("writer-agent", annotation.Agents[0].Id); + } + + /// + /// Verifies that WithAgentService with explicit agents uses those agents. + /// + [Fact] + public void WithAgentService_WithAgents_UsesProvidedAgents() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "multi-agent-service"); + var agents = new[] + { + new AgentEntityInfo("agent1", "First agent"), + new AgentEntityInfo("agent2", "Second agent") + }; + + // Act + devuiBuilder.WithAgentService(agentService, agents: agents); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .First(); + Assert.Equal(2, annotation.Agents.Count); + Assert.Equal("agent1", annotation.Agents[0].Id); + Assert.Equal("agent2", annotation.Agents[1].Id); + } + + /// + /// Verifies that WithAgentService with custom prefix uses that prefix. + /// + [Fact] + public void WithAgentService_WithEntityIdPrefix_UsesProvidedPrefix() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act + devuiBuilder.WithAgentService(agentService, entityIdPrefix: "custom-prefix"); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .First(); + Assert.Equal("custom-prefix", annotation.EntityIdPrefix); + } + + /// + /// Verifies that WithAgentService without prefix leaves EntityIdPrefix null. + /// + [Fact] + public void WithAgentService_NoEntityIdPrefix_PrefixIsNull() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act + devuiBuilder.WithAgentService(agentService); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .First(); + Assert.Null(annotation.EntityIdPrefix); + } + + #endregion + + #region Chaining Tests + + /// + /// Verifies that WithAgentService returns the builder for chaining. + /// + [Fact] + public void WithAgentService_ReturnsSameBuilder_ForChaining() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act + var result = devuiBuilder.WithAgentService(agentService); + + // Assert + Assert.Same(devuiBuilder, result); + } + + /// + /// Verifies that multiple WithAgentService calls can be chained. + /// + [Fact] + public void WithAgentService_MultipleCalls_AddsMultipleAnnotations() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var writerService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + var editorService = CreateMockAgentServiceBuilder(appBuilder, "editor-agent"); + + // Act + devuiBuilder + .WithAgentService(writerService) + .WithAgentService(editorService); + + // Assert + var annotations = devuiBuilder.Resource.Annotations + .OfType() + .ToList(); + Assert.Equal(2, annotations.Count); + Assert.Contains(annotations, a => a.AgentService.Name == "writer-agent"); + Assert.Contains(annotations, a => a.AgentService.Name == "editor-agent"); + } + + /// + /// Verifies that AddDevUI returns a builder that can be chained with WithAgentService. + /// + [Fact] + public void AddDevUI_CanChainWithAgentService() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act - Chain AddDevUI with WithAgentService + var result = appBuilder.AddDevUI("devui").WithAgentService(agentService); + + // Assert + Assert.NotNull(result); + var annotation = result.Resource.Annotations + .OfType() + .FirstOrDefault(); + Assert.NotNull(annotation); + } + + #endregion + + #region Relationship Tests + + /// + /// Verifies that WithAgentService creates a relationship annotation. + /// + [Fact] + public void WithAgentService_CreatesRelationshipAnnotation() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act + devuiBuilder.WithAgentService(agentService); + + // Assert + var relationship = devuiBuilder.Resource.Annotations + .OfType() + .FirstOrDefault(); + Assert.NotNull(relationship); + Assert.Equal("agent-backend", relationship.Type); + } + + /// + /// Verifies that multiple WithAgentService calls create multiple relationship annotations. + /// + [Fact] + public void WithAgentService_MultipleCalls_CreatesMultipleRelationships() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var writerService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + var editorService = CreateMockAgentServiceBuilder(appBuilder, "editor-agent"); + + // Act + devuiBuilder + .WithAgentService(writerService) + .WithAgentService(editorService); + + // Assert + var relationships = devuiBuilder.Resource.Annotations + .OfType() + .ToList(); + Assert.Equal(2, relationships.Count); + Assert.All(relationships, r => Assert.Equal("agent-backend", r.Type)); + } + + #endregion + + #region Agent Metadata Tests + + /// + /// Verifies that agent description is preserved when specified. + /// + [Fact] + public void WithAgentService_AgentWithDescription_PreservesDescription() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + var agents = new[] { new AgentEntityInfo("writer", "Writes creative stories") }; + + // Act + devuiBuilder.WithAgentService(agentService, agents: agents); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .First(); + Assert.Equal("Writes creative stories", annotation.Agents[0].Description); + } + + /// + /// Verifies that custom agent properties are preserved. + /// + [Fact] + public void WithAgentService_CustomAgentProperties_ArePreserved() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "custom-service"); + var agents = new[] + { + new AgentEntityInfo("custom-agent") + { + Name = "Custom Display Name", + Type = "workflow", + Framework = "custom_framework" + } + }; + + // Act + devuiBuilder.WithAgentService(agentService, agents: agents); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .First(); + var agent = annotation.Agents[0]; + Assert.Equal("custom-agent", agent.Id); + Assert.Equal("Custom Display Name", agent.Name); + Assert.Equal("workflow", agent.Type); + Assert.Equal("custom_framework", agent.Framework); + } + + /// + /// Verifies that empty agents array can be explicitly provided and is respected. + /// + [Fact] + public void WithAgentService_EmptyAgentsArray_UsesEmptyArray() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devuiBuilder = appBuilder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + var emptyAgents = Array.Empty(); + + // Act + devuiBuilder.WithAgentService(agentService, agents: emptyAgents); + + // Assert + var annotation = devuiBuilder.Resource.Annotations + .OfType() + .First(); + // When explicitly passing an empty array, the extension method respects it + // This is the expected behavior - explicit empty means "discover at runtime" + Assert.Empty(annotation.Agents); + } + + #endregion + + #region Edge Case Tests + + /// + /// Verifies that AddDevUI can be called multiple times with different names. + /// + [Fact] + public void AddDevUI_MultipleCalls_CreatesSeparateResources() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + + // Act + var devui1 = appBuilder.AddDevUI("devui1"); + var devui2 = appBuilder.AddDevUI("devui2"); + + // Assert + Assert.NotSame(devui1.Resource, devui2.Resource); + Assert.Equal("devui1", devui1.Resource.Name); + Assert.Equal("devui2", devui2.Resource.Name); + } + + /// + /// Verifies that same agent service can be added to multiple DevUI resources. + /// + [Fact] + public void WithAgentService_SameServiceToMultipleDevUI_Works() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devui1 = appBuilder.AddDevUI("devui1"); + var devui2 = appBuilder.AddDevUI("devui2"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "shared-agent"); + + // Act + devui1.WithAgentService(agentService); + devui2.WithAgentService(agentService); + + // Assert + var annotation1 = devui1.Resource.Annotations.OfType().Single(); + var annotation2 = devui2.Resource.Annotations.OfType().Single(); + Assert.Same(annotation1.AgentService, annotation2.AgentService); + } + + /// + /// Verifies that WithAgentService works with different entity ID prefixes for the same service. + /// + [Fact] + public void WithAgentService_DifferentPrefixesToDifferentDevUI_Works() + { + // Arrange + var appBuilder = DistributedApplication.CreateBuilder(); + var devui1 = appBuilder.AddDevUI("devui1"); + var devui2 = appBuilder.AddDevUI("devui2"); + var agentService = CreateMockAgentServiceBuilder(appBuilder, "writer-agent"); + + // Act + devui1.WithAgentService(agentService, entityIdPrefix: "prefix1"); + devui2.WithAgentService(agentService, entityIdPrefix: "prefix2"); + + // Assert + var annotation1 = devui1.Resource.Annotations.OfType().Single(); + var annotation2 = devui2.Resource.Annotations.OfType().Single(); + Assert.Equal("prefix1", annotation1.EntityIdPrefix); + Assert.Equal("prefix2", annotation2.EntityIdPrefix); + } + + #endregion + + #region Helper Methods + + /// + /// Creates a mock agent service builder for testing. + /// Uses a minimal resource implementation that satisfies IResourceWithEndpoints. + /// + private static IResourceBuilder CreateMockAgentServiceBuilder( + IDistributedApplicationBuilder appBuilder, + string name) + { + // Create a mock resource that implements IResourceWithEndpoints + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns(name); + mockResource.Setup(r => r.Annotations).Returns(new ResourceAnnotationCollection()); + + var mockBuilder = new Mock>(); + mockBuilder.Setup(b => b.Resource).Returns(mockResource.Object); + mockBuilder.Setup(b => b.ApplicationBuilder).Returns(appBuilder); + + return mockBuilder.Object; + } + + #endregion +} diff --git a/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentServiceAnnotationTests.cs b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentServiceAnnotationTests.cs new file mode 100644 index 0000000000..0e297c56bf --- /dev/null +++ b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/AgentServiceAnnotationTests.cs @@ -0,0 +1,167 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Aspire.Hosting.ApplicationModel; +using Moq; + +namespace Aspire.Hosting.AgentFramework.DevUI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class AgentServiceAnnotationTests +{ + #region Constructor Validation Tests + + /// + /// Verifies that passing null for agentService throws ArgumentNullException. + /// + [Fact] + public void Constructor_NullAgentService_ThrowsArgumentNullException() + { + // Act & Assert + Assert.Throws(() => new AgentServiceAnnotation(null!)); + } + + /// + /// Verifies that a valid agentService can be used to create the annotation. + /// + [Fact] + public void Constructor_ValidAgentService_CreatesAnnotation() + { + // Arrange + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns("test-service"); + + // Act + var annotation = new AgentServiceAnnotation(mockResource.Object); + + // Assert + Assert.NotNull(annotation); + Assert.Same(mockResource.Object, annotation.AgentService); + } + + #endregion + + #region Property Tests + + /// + /// Verifies that AgentService property returns the value passed to constructor. + /// + [Fact] + public void AgentService_ReturnsConstructorValue() + { + // Arrange + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns("my-service"); + + // Act + var annotation = new AgentServiceAnnotation(mockResource.Object); + + // Assert + Assert.Same(mockResource.Object, annotation.AgentService); + } + + /// + /// Verifies that EntityIdPrefix returns null when not specified. + /// + [Fact] + public void EntityIdPrefix_NotSpecified_ReturnsNull() + { + // Arrange + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns("test-service"); + + // Act + var annotation = new AgentServiceAnnotation(mockResource.Object); + + // Assert + Assert.Null(annotation.EntityIdPrefix); + } + + /// + /// Verifies that EntityIdPrefix returns the value passed to constructor. + /// + [Fact] + public void EntityIdPrefix_Specified_ReturnsValue() + { + // Arrange + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns("test-service"); + + // Act + var annotation = new AgentServiceAnnotation(mockResource.Object, entityIdPrefix: "custom-prefix"); + + // Assert + Assert.Equal("custom-prefix", annotation.EntityIdPrefix); + } + + /// + /// Verifies that Agents returns empty collection when not specified. + /// + [Fact] + public void Agents_NotSpecified_ReturnsEmptyCollection() + { + // Arrange + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns("test-service"); + + // Act + var annotation = new AgentServiceAnnotation(mockResource.Object); + + // Assert + Assert.NotNull(annotation.Agents); + Assert.Empty(annotation.Agents); + } + + /// + /// Verifies that Agents returns the list passed to constructor. + /// + [Fact] + public void Agents_Specified_ReturnsValue() + { + // Arrange + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns("test-service"); + var agents = new[] { new AgentEntityInfo("agent1"), new AgentEntityInfo("agent2") }; + + // Act + var annotation = new AgentServiceAnnotation(mockResource.Object, agents: agents); + + // Assert + Assert.Equal(2, annotation.Agents.Count); + Assert.Equal("agent1", annotation.Agents[0].Id); + Assert.Equal("agent2", annotation.Agents[1].Id); + } + + #endregion + + #region Full Constructor Tests + + /// + /// Verifies that all constructor parameters are correctly stored. + /// + [Fact] + public void Constructor_AllParameters_SetsAllProperties() + { + // Arrange + var mockResource = new Mock(); + mockResource.Setup(r => r.Name).Returns("full-service"); + var agents = new[] { new AgentEntityInfo("writer", "Writes stories") }; + + // Act + var annotation = new AgentServiceAnnotation( + mockResource.Object, + entityIdPrefix: "writer-backend", + agents: agents); + + // Assert + Assert.Same(mockResource.Object, annotation.AgentService); + Assert.Equal("writer-backend", annotation.EntityIdPrefix); + Assert.Single(annotation.Agents); + Assert.Equal("writer", annotation.Agents[0].Id); + Assert.Equal("Writes stories", annotation.Agents[0].Description); + } + + #endregion +} diff --git a/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/Aspire.Hosting.AgentFramework.DevUI.UnitTests.csproj b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/Aspire.Hosting.AgentFramework.DevUI.UnitTests.csproj new file mode 100644 index 0000000000..9c1f22aca3 --- /dev/null +++ b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/Aspire.Hosting.AgentFramework.DevUI.UnitTests.csproj @@ -0,0 +1,19 @@ + + + + $(TargetFrameworksCore) + + + + + + + + + + + + + + + diff --git a/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/DevUIAggregatorHostedServiceTests.cs b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/DevUIAggregatorHostedServiceTests.cs new file mode 100644 index 0000000000..28104aa67a --- /dev/null +++ b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/DevUIAggregatorHostedServiceTests.cs @@ -0,0 +1,298 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Linq; +using Aspire.Hosting.ApplicationModel; +using Microsoft.AspNetCore.Http; + +namespace Aspire.Hosting.AgentFramework.DevUI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class DevUIAggregatorHostedServiceTests +{ + #region RewriteAgentIdInQueryString Tests + + /// + /// Verifies that RewriteAgentIdInQueryString returns empty string when query string has no value. + /// + [Fact] + public void RewriteAgentIdInQueryString_EmptyQueryString_ReturnsEmptyString() + { + // Arrange + var queryString = QueryString.Empty; + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "writer"); + + // Assert + Assert.Equal(string.Empty, result); + } + + /// + /// Verifies that RewriteAgentIdInQueryString rewrites agent_id to the un-prefixed value. + /// + [Fact] + public void RewriteAgentIdInQueryString_WithPrefixedAgentId_RewritesToUnprefixed() + { + // Arrange + var queryString = new QueryString("?agent_id=writer-agent%2Fwriter"); + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "writer"); + + // Assert + Assert.Contains("agent_id=writer", result); + Assert.DoesNotContain("writer-agent", result); + } + + /// + /// Verifies that RewriteAgentIdInQueryString preserves other query parameters. + /// + [Fact] + public void RewriteAgentIdInQueryString_WithOtherParams_PreservesOtherParams() + { + // Arrange + var queryString = new QueryString("?agent_id=writer-agent%2Fwriter&conversation_id=123&page=5"); + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "writer"); + + // Assert + Assert.Contains("agent_id=writer", result); + Assert.Contains("conversation_id=123", result); + Assert.Contains("page=5", result); + } + + /// + /// Verifies that RewriteAgentIdInQueryString works when agent_id is not the first parameter. + /// + [Fact] + public void RewriteAgentIdInQueryString_AgentIdNotFirst_StillRewrites() + { + // Arrange + var queryString = new QueryString("?page=1&agent_id=editor-agent%2Feditor&limit=10"); + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "editor"); + + // Assert + Assert.Contains("agent_id=editor", result); + Assert.DoesNotContain("editor-agent", result); + } + + /// + /// Verifies that RewriteAgentIdInQueryString handles special characters in actual agent ID. + /// + [Fact] + public void RewriteAgentIdInQueryString_SpecialCharsInAgentId_UrlEncodesCorrectly() + { + // Arrange + var queryString = new QueryString("?agent_id=prefix%2Fmy-agent"); + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "my-agent"); + + // Assert + // The result should contain the agent_id with the value properly encoded if needed + Assert.Contains("agent_id=my-agent", result); + } + + /// + /// Verifies that RewriteAgentIdInQueryString handles an agent_id with no prefix. + /// + [Fact] + public void RewriteAgentIdInQueryString_NoPrefix_SetsDirectly() + { + // Arrange + var queryString = new QueryString("?agent_id=simple"); + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "new-value"); + + // Assert + Assert.Contains("agent_id=new-value", result); + Assert.DoesNotContain("simple", result); + } + + /// + /// Verifies that RewriteAgentIdInQueryString adds agent_id even if not originally present. + /// + [Fact] + public void RewriteAgentIdInQueryString_NoAgentId_AddsAgentId() + { + // Arrange + var queryString = new QueryString("?page=1&limit=10"); + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "writer"); + + // Assert + Assert.Contains("agent_id=writer", result); + Assert.Contains("page=1", result); + Assert.Contains("limit=10", result); + } + + /// + /// Verifies that RewriteAgentIdInQueryString returns proper format starting with ?. + /// + [Fact] + public void RewriteAgentIdInQueryString_ValidQuery_ReturnsQueryStringFormat() + { + // Arrange + var queryString = new QueryString("?agent_id=test"); + + // Act + var result = DevUIAggregatorHostedService.RewriteAgentIdInQueryString(queryString, "writer"); + + // Assert + Assert.StartsWith("?", result); + } + + #endregion + + #region Backend Resolution Behavior Tests + + /// + /// Verifies that ResolveBackends returns empty dictionary when no annotations are present. + /// These tests verify the expected behavior of the aggregator via the DevUI resource annotations. + /// + [Fact] + public void DevUIResource_NoAnnotations_ResolveBackendsReturnsEmpty() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + var devui = builder.AddDevUI("devui"); + + // Assert - no AgentServiceAnnotation means no backends + var annotations = devui.Resource.Annotations + .OfType() + .ToList(); + + Assert.Empty(annotations); + } + + /// + /// Verifies that WithAgentService adds proper annotations for backend resolution. + /// + [Fact] + public void WithAgentService_AddsAnnotation_ForBackendResolution() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + var devui = builder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(builder, "writer-agent"); + + // Act + devui.WithAgentService(agentService); + + // Assert + var annotation = devui.Resource.Annotations + .OfType() + .FirstOrDefault(); + + Assert.NotNull(annotation); + Assert.Equal("writer-agent", annotation.AgentService.Name); + } + + /// + /// Verifies that custom EntityIdPrefix is properly stored in the annotation. + /// + [Fact] + public void WithAgentService_CustomPrefix_StoresInAnnotation() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + var devui = builder.AddDevUI("devui"); + var agentService = CreateMockAgentServiceBuilder(builder, "writer-agent"); + + // Act + devui.WithAgentService(agentService, entityIdPrefix: "custom-writer"); + + // Assert + var annotation = devui.Resource.Annotations + .OfType() + .First(); + + Assert.Equal("custom-writer", annotation.EntityIdPrefix); + } + + /// + /// Verifies that multiple agent services create multiple annotations for backend resolution. + /// + [Fact] + public void WithAgentService_MultipleServices_CreatesMultipleAnnotations() + { + // Arrange + var builder = DistributedApplication.CreateBuilder(); + var devui = builder.AddDevUI("devui"); + var writerService = CreateMockAgentServiceBuilder(builder, "writer-agent"); + var editorService = CreateMockAgentServiceBuilder(builder, "editor-agent"); + + // Act + devui.WithAgentService(writerService); + devui.WithAgentService(editorService); + + // Assert + var annotations = devui.Resource.Annotations + .OfType() + .ToList(); + + Assert.Equal(2, annotations.Count); + Assert.Contains(annotations, a => a.AgentService.Name == "writer-agent"); + Assert.Contains(annotations, a => a.AgentService.Name == "editor-agent"); + } + + #endregion + + #region Entity ID Parsing Tests + + /// + /// Verifies the expected format for prefixed entity IDs in the aggregator. + /// + [Theory] + [InlineData("writer-agent/writer", "writer-agent", "writer")] + [InlineData("editor-agent/editor", "editor-agent", "editor")] + [InlineData("custom/my-agent", "custom", "my-agent")] + [InlineData("prefix/sub/path", "prefix", "sub/path")] + public void PrefixedEntityId_Format_ExtractsCorrectly(string prefixedId, string expectedPrefix, string expectedRest) + { + // This test documents the expected format for prefixed entity IDs + // The aggregator uses "prefix/entityId" format where: + // - prefix is typically the resource name or custom prefix + // - entityId is the original entity identifier from the backend + + var slashIndex = prefixedId.IndexOf('/'); + var prefix = prefixedId[..slashIndex]; + var rest = prefixedId[(slashIndex + 1)..]; + + Assert.Equal(expectedPrefix, prefix); + Assert.Equal(expectedRest, rest); + } + + #endregion + + #region Helper Methods + + /// + /// Creates a mock agent service builder for testing. + /// Uses a minimal resource implementation that satisfies IResourceWithEndpoints. + /// + private static IResourceBuilder CreateMockAgentServiceBuilder( + IDistributedApplicationBuilder appBuilder, + string name) + { + // Create a mock resource that implements IResourceWithEndpoints + var mockResource = new Moq.Mock(); + mockResource.Setup(r => r.Name).Returns(name); + mockResource.Setup(r => r.Annotations).Returns(new ResourceAnnotationCollection()); + + var mockBuilder = new Moq.Mock>(); + mockBuilder.Setup(b => b.Resource).Returns(mockResource.Object); + mockBuilder.Setup(b => b.ApplicationBuilder).Returns(appBuilder); + + return mockBuilder.Object; + } + + #endregion +} diff --git a/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/DevUIResourceTests.cs b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/DevUIResourceTests.cs new file mode 100644 index 0000000000..71409d21b0 --- /dev/null +++ b/dotnet/tests/Aspire.Hosting.AgentFramework.DevUI.UnitTests/DevUIResourceTests.cs @@ -0,0 +1,195 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Linq; +using System.Net.Sockets; +using Aspire.Hosting.ApplicationModel; + +namespace Aspire.Hosting.AgentFramework.DevUI.UnitTests; + +/// +/// Unit tests for the class. +/// +public class DevUIResourceTests +{ + #region Constructor Tests + + /// + /// Verifies that the resource name is correctly set. + /// + [Fact] + public void Constructor_WithName_SetsName() + { + // Arrange & Act + var resource = new DevUIResource("test-devui"); + + // Assert + Assert.Equal("test-devui", resource.Name); + } + + /// + /// Verifies that the resource implements IResourceWithEndpoints. + /// + [Fact] + public void Resource_ImplementsIResourceWithEndpoints() + { + // Arrange & Act + var resource = new DevUIResource("test-devui"); + + // Assert + Assert.IsAssignableFrom(resource); + } + + /// + /// Verifies that the resource implements IResourceWithWaitSupport. + /// + [Fact] + public void Resource_ImplementsIResourceWithWaitSupport() + { + // Arrange & Act + var resource = new DevUIResource("test-devui"); + + // Assert + Assert.IsAssignableFrom(resource); + } + + #endregion + + #region Endpoint Annotation Tests + + /// + /// Verifies that the resource has an HTTP endpoint annotation when port is specified. + /// + [Fact] + public void Constructor_WithPort_AddsEndpointAnnotation() + { + // Arrange & Act + var resource = CreateResourceWithPort(8090); + + // Assert + var endpoint = resource.Annotations.OfType().FirstOrDefault(); + Assert.NotNull(endpoint); + Assert.Equal("http", endpoint.Name); + Assert.Equal(8090, endpoint.Port); + } + + /// + /// Verifies that the endpoint annotation has correct protocol type. + /// + [Fact] + public void EndpointAnnotation_HasTcpProtocol() + { + // Arrange + var resource = CreateResourceWithPort(8080); + + // Act + var endpoint = resource.Annotations.OfType().First(); + + // Assert + Assert.Equal(ProtocolType.Tcp, endpoint.Protocol); + } + + /// + /// Verifies that the endpoint annotation has HTTP URI scheme. + /// + [Fact] + public void EndpointAnnotation_HasHttpUriScheme() + { + // Arrange + var resource = CreateResourceWithPort(8080); + + // Act + var endpoint = resource.Annotations.OfType().First(); + + // Assert + Assert.Equal("http", endpoint.UriScheme); + } + + /// + /// Verifies that the endpoint is not proxied. + /// + [Fact] + public void EndpointAnnotation_IsNotProxied() + { + // Arrange + var resource = CreateResourceWithPort(8080); + + // Act + var endpoint = resource.Annotations.OfType().First(); + + // Assert + Assert.False(endpoint.IsProxied); + } + + /// + /// Verifies that the endpoint target host is localhost. + /// + [Fact] + public void EndpointAnnotation_TargetHostIsLocalhost() + { + // Arrange + var resource = CreateResourceWithPort(8080); + + // Act + var endpoint = resource.Annotations.OfType().First(); + + // Assert + Assert.Equal("localhost", endpoint.TargetHost); + } + + /// + /// Verifies that the endpoint has no fixed port when null is passed. + /// + [Fact] + public void Constructor_WithNullPort_EndpointHasNullPort() + { + // Arrange & Act + var resource = CreateResourceWithPort(null); + + // Assert + var endpoint = resource.Annotations.OfType().FirstOrDefault(); + Assert.NotNull(endpoint); + Assert.Null(endpoint.Port); + } + + #endregion + + #region PrimaryEndpoint Tests + + /// + /// Verifies that PrimaryEndpoint returns an endpoint reference. + /// + [Fact] + public void PrimaryEndpoint_ReturnsEndpointReference() + { + // Arrange + var resource = CreateResourceWithPort(8080); + + // Act + var endpoint = resource.PrimaryEndpoint; + + // Assert + Assert.NotNull(endpoint); + Assert.Same(resource, endpoint.Resource); + } + + /// + /// Verifies that PrimaryEndpoint returns the same instance on multiple calls. + /// + [Fact] + public void PrimaryEndpoint_MultipleCalls_ReturnsSameInstance() + { + // Arrange + var resource = CreateResourceWithPort(8080); + + // Act + var endpoint1 = resource.PrimaryEndpoint; + var endpoint2 = resource.PrimaryEndpoint; + + // Assert + Assert.Same(endpoint1, endpoint2); + } + + #endregion + + private static DevUIResource CreateResourceWithPort(int? port) => new("test-devui", port); +} diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.Declarative.UnitTests/Kit/PortableValuePredicateTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.Declarative.UnitTests/Kit/PortableValuePredicateTests.cs new file mode 100644 index 0000000000..4ed50afb5a --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.Declarative.UnitTests/Kit/PortableValuePredicateTests.cs @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft. All rights reserved. + +using FluentAssertions; +using Microsoft.Agents.AI.Workflows.Declarative.Events; +using Microsoft.Agents.AI.Workflows.Declarative.Kit; +using Microsoft.Agents.AI.Workflows.Declarative.ObjectModel; + +namespace Microsoft.Agents.AI.Workflows.Declarative.UnitTests.Kit; + +/// +/// Tests that edge predicates correctly handle PortableValue-wrapped messages, +/// which occur after checkpoint restore (JSON round-trip). +/// +public sealed class PortableValuePredicateTests +{ + #region ActionExecutorResult.ThrowIfNot + + [Fact] + public void ActionExecutorResult_ThrowIfNot_WithDirectActionExecutorResult_ReturnsResult() + { + // Arrange + ActionExecutorResult result = new("test-executor"); + + // Act + ActionExecutorResult actual = ActionExecutorResult.ThrowIfNot(result); + + // Assert + actual.Should().BeSameAs(result); + } + + [Fact] + public void ActionExecutorResult_ThrowIfNot_WithPortableValueWrappedActionExecutorResult_Unwraps() + { + // Arrange + ActionExecutorResult result = new("test-executor"); + PortableValue wrapped = new(result); + + // Act + ActionExecutorResult actual = ActionExecutorResult.ThrowIfNot(wrapped); + + // Assert + actual.ExecutorId.Should().Be("test-executor"); + } + + [Fact] + public void ActionExecutorResult_ThrowIfNot_WithNonActionExecutorResult_Throws() + { + // Arrange + object message = "not an ActionExecutorResult"; + + // Act & Assert + Assert.Throws(() => ActionExecutorResult.ThrowIfNot(message)); + } + + [Fact] + public void ActionExecutorResult_ThrowIfNot_WithNull_Throws() + { + // Act & Assert + Assert.Throws(() => ActionExecutorResult.ThrowIfNot(null)); + } + + [Fact] + public void ActionExecutorResult_ThrowIfNot_WithPortableValueWrappedNonResult_Throws() + { + // Arrange + PortableValue wrapped = new("not an ActionExecutorResult"); + + // Act & Assert + Assert.Throws(() => ActionExecutorResult.ThrowIfNot(wrapped)); + } + + #endregion + + #region InvokeAzureAgentExecutor Predicates + + [Fact] + public void InvokeAzureAgentExecutor_RequiresInput_WithDirectExternalInputRequest_ReturnsTrue() + { + // Arrange + ExternalInputRequest request = new("test prompt"); + + // Act & Assert + InvokeAzureAgentExecutor.RequiresInput(request).Should().BeTrue(); + } + + [Fact] + public void InvokeAzureAgentExecutor_RequiresInput_WithPortableValueWrappedRequest_ReturnsTrue() + { + // Arrange + ExternalInputRequest request = new("test prompt"); + PortableValue wrapped = new(request); + + // Act & Assert + InvokeAzureAgentExecutor.RequiresInput(wrapped).Should().BeTrue(); + } + + [Fact] + public void InvokeAzureAgentExecutor_RequiresInput_WithActionExecutorResult_ReturnsFalse() + { + // Arrange + ActionExecutorResult result = new("test"); + + // Act & Assert + InvokeAzureAgentExecutor.RequiresInput(result).Should().BeFalse(); + } + + [Fact] + public void InvokeAzureAgentExecutor_RequiresNothing_WithDirectActionExecutorResult_ReturnsTrue() + { + // Arrange + ActionExecutorResult result = new("test"); + + // Act & Assert + InvokeAzureAgentExecutor.RequiresNothing(result).Should().BeTrue(); + } + + [Fact] + public void InvokeAzureAgentExecutor_RequiresNothing_WithPortableValueWrappedResult_ReturnsTrue() + { + // Arrange + ActionExecutorResult result = new("test"); + PortableValue wrapped = new(result); + + // Act & Assert + InvokeAzureAgentExecutor.RequiresNothing(wrapped).Should().BeTrue(); + } + + [Fact] + public void InvokeAzureAgentExecutor_RequiresNothing_WithExternalInputRequest_ReturnsFalse() + { + // Arrange + ExternalInputRequest request = new("test prompt"); + + // Act & Assert + InvokeAzureAgentExecutor.RequiresNothing(request).Should().BeFalse(); + } + + #endregion + + #region InvokeMcpToolExecutor Predicates + + [Fact] + public void InvokeMcpToolExecutor_RequiresInput_WithPortableValueWrappedRequest_ReturnsTrue() + { + // Arrange + ExternalInputRequest request = new("test prompt"); + PortableValue wrapped = new(request); + + // Act & Assert + InvokeMcpToolExecutor.RequiresInput(wrapped).Should().BeTrue(); + } + + [Fact] + public void InvokeMcpToolExecutor_RequiresNothing_WithPortableValueWrappedResult_ReturnsTrue() + { + // Arrange + ActionExecutorResult result = new("test"); + PortableValue wrapped = new(result); + + // Act & Assert + InvokeMcpToolExecutor.RequiresNothing(wrapped).Should().BeTrue(); + } + + #endregion + + #region QuestionExecutor.IsComplete + + [Fact] + public void QuestionExecutor_IsComplete_WithPortableValueWrappedResult_NullResult_ReturnsTrue() + { + // Arrange - result with null Result property means "complete" + ActionExecutorResult result = new("test", result: null); + PortableValue wrapped = new(result); + + // Act & Assert + QuestionExecutor.IsComplete(wrapped).Should().BeTrue(); + } + + [Fact] + public void QuestionExecutor_IsComplete_WithPortableValueWrappedResult_NonNullResult_ReturnsFalse() + { + // Arrange - result with non-null Result property means "not complete" + ActionExecutorResult result = new("test", result: true); + PortableValue wrapped = new(result); + + // Act & Assert + QuestionExecutor.IsComplete(wrapped).Should().BeFalse(); + } + + #endregion +} diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/AgentWorkflowBuilderTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/AgentWorkflowBuilderTests.cs index c857811b08..fc984a9963 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/AgentWorkflowBuilderTests.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/AgentWorkflowBuilderTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Runtime.CompilerServices; using System.Text; using System.Text.Json; @@ -11,7 +12,9 @@ using System.Threading; using System.Threading.Tasks; using FluentAssertions; using Microsoft.Agents.AI.Workflows.InProc; +using Microsoft.Agents.AI.Workflows.Specialized; using Microsoft.Extensions.AI; +using Microsoft.Extensions.Logging; #pragma warning disable SYSLIB1045 // Use GeneratedRegex #pragma warning disable RCS1186 // Use Regex instance instead of static method @@ -52,6 +55,51 @@ public class AgentWorkflowBuilderTests var noDescriptionAgent = new ChatClientAgent(new MockChatClient(delegate { return new(); })); Assert.Throws("to", () => handoffs.WithHandoff(agent, noDescriptionAgent)); + + var emptyDescriptionAgent = new MockChatClient(delegate { return new(); }).AsAIAgent(description: ""); + Assert.Throws("to", () => handoffs.WithHandoff(agent, emptyDescriptionAgent)); + + var emptyNameAgent = new MockChatClient(delegate { return new(); }).AsAIAgent(name: ""); + Assert.Throws("to", () => handoffs.WithHandoff(agent, emptyNameAgent)); + } + + private sealed class NullLogger : ILogger + { + public IDisposable? BeginScope(TState state) where TState : notnull + { + return null; + } + + public bool IsEnabled(LogLevel logLevel) + { + return false; + } + + public void Log(LogLevel logLevel, EventId eventId, TState state, Exception? exception, Func formatter) + { + } + } + + [Fact] + public void BuildHandoffs_DelegatingAIAgent_DoesNotThrow() + { + DoubleEchoAgent agent = new("agent"); + HandoffWorkflowBuilder handoffs = AgentWorkflowBuilder.CreateHandoffBuilderWith(agent); + Assert.NotNull(handoffs); + + ChatClientAgent instructionsOnlyAgent = new MockChatClient(delegate { return new(); }).AsAIAgent(instructions: "instructions"); + LoggingAgent delegatingAgent = new(instructionsOnlyAgent, new NullLogger()); + + handoffs.WithHandoff(agent, delegatingAgent); + + // get the _targets field from the HandoffWorkflowBuilder (need to use the base type) + FieldInfo field = typeof(HandoffWorkflowBuilder).BaseType!.GetField("_targets", BindingFlags.Instance | BindingFlags.NonPublic)!; + Dictionary>? targets = field.GetValue(handoffs) as Dictionary>; + + targets.Should().NotBeNull(); + + HandoffTarget target = targets[agent].Single(); + target.Reason.Should().Be("instructions"); } [Fact] diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/HandoffAgentExecutorTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/HandoffAgentExecutorTests.cs index 236d9ae455..70f802399d 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/HandoffAgentExecutorTests.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/HandoffAgentExecutorTests.cs @@ -7,6 +7,10 @@ using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using FluentAssertions; +using Microsoft.Agents.AI.Workflows.Checkpointing; +using Microsoft.Agents.AI.Workflows.Execution; +using Microsoft.Agents.AI.Workflows.InProc; +using Microsoft.Agents.AI.Workflows.Sample; using Microsoft.Agents.AI.Workflows.Specialized; using Microsoft.Extensions.AI; @@ -14,6 +18,27 @@ namespace Microsoft.Agents.AI.Workflows.UnitTests; public class HandoffAgentExecutorTests : AIAgentHostingExecutorTestsBase { + private static async ValueTask PrepareHandoffSharedStateAsync(TestRunContext? runContext = null, IEnumerable? messages = null) + { + runContext ??= new(); + + HandoffSharedState sharedState = new(); + + if (messages != null) + { + sharedState.Conversation.AddMessages(messages); + } + + await runContext.BindWorkflowContext(nameof(HandoffStartExecutor)) + .QueueStateUpdateAsync(HandoffConstants.HandoffSharedStateKey, + sharedState, + HandoffConstants.HandoffSharedStateScope); + + await runContext.StateManager.PublishUpdatesAsync(null); + + return runContext; + } + [Theory] [InlineData(null, null)] [InlineData(null, true)] @@ -27,7 +52,7 @@ public class HandoffAgentExecutorTests : AIAgentHostingExecutorTestsBase public async Task Test_HandoffAgentExecutor_EmitsStreamingUpdatesIFFConfiguredAsync(bool? executorSetting, bool? turnSetting) { // Arrange - TestRunContext testContext = new(); + TestRunContext testContext = await PrepareHandoffSharedStateAsync(); TestReplayAgent agent = new(TestMessages, TestAgentId, TestAgentName); HandoffAgentExecutorOptions options = new("", @@ -39,7 +64,7 @@ public class HandoffAgentExecutorTests : AIAgentHostingExecutorTestsBase testContext.ConfigureExecutor(executor); // Act - HandoffState message = new(new(turnSetting), null, []); + HandoffState message = new(new(turnSetting), null, null); await executor.HandleAsync(message, testContext.BindWorkflowContext(executor.Id)); // Assert @@ -55,7 +80,7 @@ public class HandoffAgentExecutorTests : AIAgentHostingExecutorTestsBase public async Task Test_HandoffAgentExecutor_EmitsResponseIFFConfiguredAsync(bool executorSetting) { // Arrange - TestRunContext testContext = new(); + TestRunContext testContext = await PrepareHandoffSharedStateAsync(); TestReplayAgent agent = new(TestMessages, TestAgentId, TestAgentName); HandoffAgentExecutorOptions options = new("", @@ -67,7 +92,7 @@ public class HandoffAgentExecutorTests : AIAgentHostingExecutorTestsBase testContext.ConfigureExecutor(executor); // Act - HandoffState message = new(new(false), null, []); + HandoffState message = new(new(false), null, null); await executor.HandleAsync(message, testContext.BindWorkflowContext(executor.Id)); // Assert @@ -75,6 +100,82 @@ public class HandoffAgentExecutorTests : AIAgentHostingExecutorTestsBase CheckResponseEventsAgainstTestMessages(updates, expectingResponse: executorSetting, agent.GetDescriptiveId()); } + [Fact] + public async Task Test_HandoffAgentExecutor_ComposesWithHITLSubworkflowAsync() + { + // Arrange + TestRunContext testContext = await PrepareHandoffSharedStateAsync(); + + SendsRequestExecutor challengeSender = new(); + Workflow subworkflow = new WorkflowBuilder(challengeSender) + .AddExternalRequest(challengeSender, "SendChallengeToUser") + .WithOutputFrom(challengeSender) + .Build(); + + InProcessExecutionEnvironment environment = InProcessExecution.Lockstep.WithCheckpointing(CheckpointManager.CreateInMemory()); + AIAgent subworkflowAgent = subworkflow.AsAIAgent(includeWorkflowOutputsInResponse: true, name: "Subworkflow", executionEnvironment: environment); + HandoffAgentExecutorOptions options = new("", + emitAgentResponseEvents: true, + emitAgentResponseUpdateEvents: true, + HandoffToolCallFilteringBehavior.None); + + HandoffAgentExecutor executor = new(subworkflowAgent, [], options); + Workflow fakeWorkflow = new(executor.Id) { ExecutorBindings = { { executor.Id, executor } } }; + EdgeMap map = new(testContext, fakeWorkflow, null); + + testContext.ConfigureExecutor(executor, map); + + // Validate that our test assumptions hold + string functionCallPortId = $"{HandoffAgentExecutor.IdFor(subworkflowAgent)}_FunctionCall"; + map.TryGetResponsePortExecutorId(functionCallPortId, out string? responsePortExecutorId).Should().BeTrue(); + responsePortExecutorId.Should().Be(executor.Id); + + // Act + HandoffState message = new(new(false), null, null); + await executor.HandleAsync(message, testContext.BindWorkflowContext(executor.Id)); + + await testContext.StateManager.PublishUpdatesAsync(null); + + // Assert + testContext.ExternalRequests.Should().HaveCount(1) + .And.ContainSingle(request => request.IsDataOfType()); + + FunctionCallContent functionCallContent = testContext.ExternalRequests.Single().Data.As()!; + object? requestData = functionCallContent.Arguments!["data"]; + + Challenge? challenge = null; + if (requestData is PortableValue pv) + { + challenge = pv.As(); + } + else + { + challenge = requestData as Challenge; + } + + if (challenge is null) + { + Assert.Fail($"Expected request data to be of type {typeof(Challenge).FullName}, but was {requestData?.GetType().FullName ?? "null"}"); + return; // Unreachable, but analysis cannot infer that Debug.Fail will throw/exit, and UnreachableException is not available on net472 + } + + // Act 2 + string challengeResponse = new(challenge.Value.Reverse().ToArray()); + FunctionResultContent responseContent = new(functionCallContent.CallId, new Response(challengeResponse)); + + RequestPortInfo requestPortInfo = new(new(typeof(Challenge)), new(typeof(Response)), functionCallPortId); + string requestId = $"{functionCallPortId.Length}:{functionCallPortId}:{functionCallContent.CallId}"; + DeliveryMapping? mapping = await map.PrepareDeliveryForResponseAsync(new(requestPortInfo, requestId, new(responseContent))); + + mapping!.Deliveries.Should().HaveCount(1); + + MessageDelivery delivery = mapping!.Deliveries.Single(); + + object? result = await executor.ExecuteCoreAsync(delivery.Envelope.Message, + delivery.Envelope.MessageType, + testContext.BindWorkflowContext(executor.Id)); + } + [Fact] public async Task Test_HandoffAgentExecutor_PreservesExistingInstructionsAndToolsAsync() { @@ -92,80 +193,113 @@ public class HandoffAgentExecutorTests : AIAgentHostingExecutorTestsBase HandoffTarget handoff = new(targetAgent); HandoffAgentExecutor executor = new(handoffAgent, [handoff], options); - TestWorkflowContext testContext = new(executor.Id); - HandoffState state = new(new(false), null, [], null); + TestRunContext runContext = await PrepareHandoffSharedStateAsync(); + IWorkflowContext testContext = runContext.BindWorkflowContext(executor.Id); + HandoffState state = new(new(false), null); // Act / Assert Func runStreamingAsync = async () => await executor.HandleAsync(state, testContext); await runStreamingAsync.Should().NotThrowAsync(); } +} - private sealed class OptionValidatingChatClient(string baseInstructions, string handoffInstructions, AITool baseTool) : IChatClient +internal sealed record Challenge(string Value); +internal sealed record Response(string Value); + +[SendsMessage(typeof(Challenge))] +internal sealed partial class SendsRequestExecutor(string? id = null) : ChatProtocolExecutor(id ?? nameof(SendsRequestExecutor), s_chatOptions) +{ + internal const string ChallengeString = "{C7A762AE-7DAA-4D9C-A647-E64E6DBC35AE}"; + private static string ResponseKey { get; } = new(ChallengeString.Reverse().ToArray()); + + private static readonly ChatProtocolExecutorOptions s_chatOptions = new() { - public void Dispose() + AutoSendTurnToken = false + }; + + protected override ValueTask TakeTurnAsync(List messages, IWorkflowContext context, bool? emitEvents, CancellationToken cancellationToken = default) + => context.SendMessageAsync(new Challenge(ChallengeString), cancellationToken); + + [MessageHandler] + public async ValueTask HandleChallengeResponseAsync(Response response, IWorkflowContext context, CancellationToken cancellationToken = default) + { + if (response.Value != ResponseKey) { + throw new InvalidOperationException($"Incorrect response received. Expected '{ResponseKey}' but got '{response.Value}'"); } - private void CheckOptions(ChatOptions? options) - { - options.Should().NotBeNull(); + await context.SendMessageAsync(new ChatMessage(ChatRole.Assistant, "Correct response."), cancellationToken) + .ConfigureAwait(false); - options.Instructions.Should().NotBeNullOrEmpty("Handoff orchestration should preserve and augment instructions.") - .And.Contain(baseInstructions, because: "Handoff orchestration should preserve existing instructions.") - .And.Contain(handoffInstructions, because: "Handoff orchestration should inject handoff instructions."); + await context.SendMessageAsync(new TurnToken(false), cancellationToken).ConfigureAwait(false); + } +} - options.Tools.Should().NotBeNullOrEmpty("Handoff orchestration should preserve and augment tools.") - .And.Contain(tool => tool.Name == baseTool.Name, "Handoff orchestration should preserve existing tools.") - .And.Contain(tool => tool.Name.StartsWith(HandoffWorkflowBuilder.FunctionPrefix, StringComparison.Ordinal), - because: "Handoff orchestration should inject handoff tools."); - } +internal sealed class OptionValidatingChatClient(string baseInstructions, string handoffInstructions, AITool baseTool) : IChatClient +{ + public void Dispose() + { + } - private List ResponseMessages => - [ - new ChatMessage(ChatRole.Assistant, "Ok") + private void CheckOptions(ChatOptions? options) + { + options.Should().NotBeNull(); + + options.Instructions.Should().NotBeNullOrEmpty("Handoff orchestration should preserve and augment instructions.") + .And.Contain(baseInstructions, because: "Handoff orchestration should preserve existing instructions.") + .And.Contain(handoffInstructions, because: "Handoff orchestration should inject handoff instructions."); + + options.Tools.Should().NotBeNullOrEmpty("Handoff orchestration should preserve and augment tools.") + .And.Contain(tool => tool.Name == baseTool.Name, "Handoff orchestration should preserve existing tools.") + .And.Contain(tool => tool.Name.StartsWith(HandoffWorkflowBuilder.FunctionPrefix, StringComparison.Ordinal), + because: "Handoff orchestration should inject handoff tools."); + } + + private List ResponseMessages => + [ + new ChatMessage(ChatRole.Assistant, "Ok") { MessageId = Guid.NewGuid().ToString(), AuthorName = nameof(OptionValidatingChatClient) } - ]; + ]; - public Task GetResponseAsync(IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) + public Task GetResponseAsync(IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) + { + this.CheckOptions(options); + + ChatResponse response = new(this.ResponseMessages) { - this.CheckOptions(options); + ResponseId = Guid.NewGuid().ToString("N"), + CreatedAt = DateTimeOffset.Now + }; - ChatResponse response = new(this.ResponseMessages) + return Task.FromResult(response); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + { + if (serviceType == typeof(OptionValidatingChatClient)) + { + return this; + } + + return null; + } + + public async IAsyncEnumerable GetStreamingResponseAsync(IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + this.CheckOptions(options); + + string responseId = Guid.NewGuid().ToString("N"); + foreach (ChatMessage message in this.ResponseMessages) + { + yield return new(message.Role, message.Contents) { - ResponseId = Guid.NewGuid().ToString("N"), + ResponseId = responseId, + MessageId = message.MessageId, CreatedAt = DateTimeOffset.Now }; - - return Task.FromResult(response); - } - - public object? GetService(Type serviceType, object? serviceKey = null) - { - if (serviceType == typeof(OptionValidatingChatClient)) - { - return this; - } - - return null; - } - - public async IAsyncEnumerable GetStreamingResponseAsync(IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) - { - this.CheckOptions(options); - - string responseId = Guid.NewGuid().ToString("N"); - foreach (ChatMessage message in this.ResponseMessages) - { - yield return new(message.Role, message.Contents) - { - ResponseId = responseId, - MessageId = message.MessageId, - CreatedAt = DateTimeOffset.Now - }; - } } } } diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Sample/12_HandOff_HostAsAgent.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Sample/12_HandOff_HostAsAgent.cs index 993a6d462b..dc1072aa72 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Sample/12_HandOff_HostAsAgent.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Sample/12_HandOff_HostAsAgent.cs @@ -73,6 +73,7 @@ internal static class Step12EntryPoint foreach (string input in inputs) { AgentResponse response; + ResponseContinuationToken? continuationToken = null; do { diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/SampleSmokeTest.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/SampleSmokeTest.cs index 247499b72e..a290948ae4 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/SampleSmokeTest.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/SampleSmokeTest.cs @@ -314,6 +314,38 @@ public class SampleSmokeTest Action CreateValidator(string expected) => actual => actual.Should().Be(expected); } + public class Step12ExpectedOutputCalculator(int agentCount) + { + private readonly int[] _bookmarks = new int[agentCount]; + private readonly List _history = new(); + private readonly HashSet _skipIndices = new(); + + public IEnumerable ExpectedOutputs => + this._history.Where((element, index) => !this._skipIndices.Contains(index)); + + public void ProcessInput(string newInput) + { + this._skipIndices.Add(this._history.Count); + this._history.Add(newInput); + + for (int i = 0; i < agentCount; i++) + { + int agentId = i + 1; + int agentBookmark = this._bookmarks[i]; + int count = this._history.Count - agentBookmark; + + count.Should().BeGreaterThanOrEqualTo(0); + + foreach (string input in this._history.Skip(agentBookmark).ToList()) + { + this._history.Add($"{agentId}:{input}"); + } + + this._bookmarks[i] = this._history.Count; + } + } + } + [Theory] [InlineData(ExecutionEnvironment.InProcess_Lockstep)] [InlineData(ExecutionEnvironment.InProcess_OffThread)] @@ -322,14 +354,10 @@ public class SampleSmokeTest { List inputs = ["1", "2", "3"]; - using StringWriter writer = new(); - await Step12EntryPoint.RunAsync(writer, environment.ToWorkflowExecutionEnvironment(), inputs); - - string[] lines = writer.ToString().Split(['\r', '\n'], StringSplitOptions.RemoveEmptyEntries); - // The expectation is that each agent will echo each input along with every echo from previous agents // E.g.: // (user): 1 + // ----- outputs below // (a1): 1:1 // (a2): 2:1 // (a2): 2:1:1 @@ -340,7 +368,35 @@ public class SampleSmokeTest // (a3): 3:2:1 // (a3): 3:2:1:1 - string[] expected = inputs.SelectMany(input => EchoesForInput(input)).ToArray(); + // If there are multiple inputs (there are), then each successive input adds to the depth of the previous + // ones, so, for example, once we do input = "1", "2": + + // (user): 1 + // (a1): 1:1 <- a1 "last seen" + // (a2): 2:1 + // (a2): 2:1:1 <- a2 "last seen" + // (user): 2 + // ----- outputs below + // (a1): 1:2:1 + // (a1): 1:2:1:1 + // (a1): 1:2 <- from user input, a1 "last seen" + // (a2): 2:2 <- from user input (note that a2 seems like it is seeing these in a different "order" than a1 - but it is not) + // (a2): 2:1:2:1 + // (a2): 2:1:2:1:1 + // (a2): 2:1:2 <- from a1's first echo, a2 "last seen" + + Step12ExpectedOutputCalculator outputGenerator = new(Step12EntryPoint.AgentCount); + foreach (string input in inputs) + { + outputGenerator.ProcessInput(input); + } + + string[] expected = outputGenerator.ExpectedOutputs.ToArray(); + + using StringWriter writer = new(); + await Step12EntryPoint.RunAsync(writer, environment.ToWorkflowExecutionEnvironment(), inputs); + + string[] lines = writer.ToString().Split(['\r', '\n'], StringSplitOptions.RemoveEmptyEntries); Console.Error.WriteLine("Expected lines: "); foreach (string expectedLine in expected) @@ -357,19 +413,6 @@ public class SampleSmokeTest Assert.Collection(lines, expected.Select(CreateValidator).ToArray()); - IEnumerable EchoesForInput(string input) - { - List echoes = [$"{Step12EntryPoint.EchoPrefixForAgent(1)}{input}"]; - for (int i = 2; i <= Step12EntryPoint.AgentCount; i++) - { - string agentPrefix = Step12EntryPoint.EchoPrefixForAgent(i); - List newEchoes = [$"{agentPrefix}{input}", .. echoes.Select(echo => $"{agentPrefix}{echo}")]; - echoes.AddRange(newEchoes); - } - - return echoes; - } - Action CreateValidator(string expected) => actual => actual.Should().Be(expected); } diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/TestRunContext.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/TestRunContext.cs index f94c463d59..be0d62528d 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/TestRunContext.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/TestRunContext.cs @@ -27,6 +27,9 @@ public class TestRunContext : IRunnerContext internal TestRunContext ConfigureExecutor(Executor executor, EdgeMap? map = null) { + // Ensure that we have run the ProtocolBuilder + _ = executor.Protocol.Describe(); + executor.AttachRequestContext(new TestExternalRequestContext(this, executor.Id, map)); this.Executors.Add(executor.Id, executor); return this; @@ -42,6 +45,7 @@ public class TestRunContext : IRunnerContext return this; } + internal StateManager StateManager { get; } = new(); private sealed class BoundContext( string executorId, TestRunContext runnerContext, @@ -70,16 +74,16 @@ public class TestRunContext : IRunnerContext => this.AddEventAsync(new RequestHaltEvent()); public ValueTask QueueClearScopeAsync(string? scopeName = null, CancellationToken cancellationToken = default) - => default; + => runnerContext.StateManager.ClearStateAsync(executorId, scopeName); public ValueTask QueueStateUpdateAsync(string key, T? value, string? scopeName = null, CancellationToken cancellationToken = default) - => default; + => runnerContext.StateManager.WriteStateAsync(new ScopeId(executorId, scopeName), key, value); public ValueTask ReadStateAsync(string key, string? scopeName = null, CancellationToken cancellationToken = default) - => new(default(T?)); + => runnerContext.StateManager.ReadStateAsync(new ScopeId(executorId, scopeName), key); public ValueTask> ReadStateKeysAsync(string? scopeName = null, CancellationToken cancellationToken = default) - => new([]); + => runnerContext.StateManager.ReadKeysAsync(new ScopeId(executorId, scopeName)); public ValueTask SendMessageAsync(object message, string? targetId = null, CancellationToken cancellationToken = default) => runnerContext.SendMessageAsync(executorId, message, targetId, cancellationToken); diff --git a/python/.cspell.json b/python/.cspell.json index c5bf954d2b..d53d710352 100644 --- a/python/.cspell.json +++ b/python/.cspell.json @@ -31,6 +31,7 @@ "azuredocs", "azurefunctions", "boto", + "codeact", "contentvector", "contoso", "datamodel", @@ -46,6 +47,7 @@ "hnsw", "httpx", "huggingface", + "hyperlight", "Instrumentor", "logit", "logprobs", diff --git a/python/PACKAGE_STATUS.md b/python/PACKAGE_STATUS.md index e6b5f403ce..661cebe53a 100644 --- a/python/PACKAGE_STATUS.md +++ b/python/PACKAGE_STATUS.md @@ -33,6 +33,7 @@ Status is grouped into these buckets: | `agent-framework-foundry-local` | `python/packages/foundry_local` | `beta` | | `agent-framework-gemini` | `python/packages/gemini` | `alpha` | | `agent-framework-github-copilot` | `python/packages/github_copilot` | `beta` | +| `agent-framework-hyperlight` | `python/packages/hyperlight` | `alpha` | | `agent-framework-lab` | `python/packages/lab` | `beta` | | `agent-framework-mem0` | `python/packages/mem0` | `beta` | | `agent-framework-ollama` | `python/packages/ollama` | `beta` | diff --git a/python/packages/copilotstudio/agent_framework_copilotstudio/_agent.py b/python/packages/copilotstudio/agent_framework_copilotstudio/_agent.py index 56a9c89081..333e75470f 100644 --- a/python/packages/copilotstudio/agent_framework_copilotstudio/_agent.py +++ b/python/packages/copilotstudio/agent_framework_copilotstudio/_agent.py @@ -244,7 +244,8 @@ class CopilotStudioAgent(BaseAgent): """Non-streaming implementation of run.""" if not session: session = self.create_session() - session.service_session_id = await self._start_new_conversation() + if not session.service_session_id: + session.service_session_id = await self._start_new_conversation() input_messages = normalize_messages(messages) @@ -271,7 +272,8 @@ class CopilotStudioAgent(BaseAgent): nonlocal session if not session: session = self.create_session() - session.service_session_id = await self._start_new_conversation() + if not session.service_session_id: + session.service_session_id = await self._start_new_conversation() input_messages = normalize_messages(messages) diff --git a/python/packages/copilotstudio/tests/test_copilot_agent.py b/python/packages/copilotstudio/tests/test_copilot_agent.py index 77e370ab1e..49da1d7208 100644 --- a/python/packages/copilotstudio/tests/test_copilot_agent.py +++ b/python/packages/copilotstudio/tests/test_copilot_agent.py @@ -245,6 +245,47 @@ class TestCopilotStudioAgent: assert response_count == 1 assert session.service_session_id == "test-conversation-id" + async def test_run_reuses_existing_conversation( + self, mock_copilot_client: MagicMock, mock_activity: MagicMock + ) -> None: + """Test run method reuses an existing conversation ID from the session.""" + agent = CopilotStudioAgent(client=mock_copilot_client) + session = AgentSession() + session.service_session_id = "existing-conversation-id" + + mock_copilot_client.ask_question.return_value = create_async_generator([mock_activity]) + + response = await agent.run("test message", session=session) + + assert isinstance(response, AgentResponse) + assert session.service_session_id == "existing-conversation-id" + mock_copilot_client.start_conversation.assert_not_called() + mock_copilot_client.ask_question.assert_called_once_with("test message", "existing-conversation-id") + + async def test_run_streaming_reuses_existing_conversation(self, mock_copilot_client: MagicMock) -> None: + """Test run(stream=True) method reuses an existing conversation ID from the session.""" + agent = CopilotStudioAgent(client=mock_copilot_client) + session = AgentSession() + session.service_session_id = "existing-conversation-id" + + typing_activity = MagicMock() + typing_activity.text = "Streaming response" + typing_activity.type = "typing" + typing_activity.id = "test-typing-id" + typing_activity.from_property.name = "Test Bot" + + mock_copilot_client.ask_question.return_value = create_async_generator([typing_activity]) + + response_count = 0 + async for response in agent.run("test message", session=session, stream=True): + assert isinstance(response, AgentResponseUpdate) + response_count += 1 + + assert response_count == 1 + assert session.service_session_id == "existing-conversation-id" + mock_copilot_client.start_conversation.assert_not_called() + mock_copilot_client.ask_question.assert_called_once_with("test message", "existing-conversation-id") + async def test_run_streaming_no_typing_activity(self, mock_copilot_client: MagicMock) -> None: """Test run(stream=True) method with non-typing activity.""" agent = CopilotStudioAgent(client=mock_copilot_client) diff --git a/python/packages/core/agent_framework/_feature_stage.py b/python/packages/core/agent_framework/_feature_stage.py index 1bda62b5d3..761b7860a4 100644 --- a/python/packages/core/agent_framework/_feature_stage.py +++ b/python/packages/core/agent_framework/_feature_stage.py @@ -49,6 +49,7 @@ class ExperimentalFeature(str, Enum): EVALS = "EVALS" FILE_HISTORY = "FILE_HISTORY" SKILLS = "SKILLS" + TOOLBOXES = "TOOLBOXES" class ReleaseCandidateFeature(str, Enum): diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 47eefe8da9..5f5e91b656 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -12,6 +12,7 @@ from collections.abc import ( AsyncIterable, Awaitable, Callable, + Iterable, Mapping, Sequence, ) @@ -89,6 +90,7 @@ logger = logging.getLogger("agent_framework") DEFAULT_MAX_ITERATIONS: Final[int] = 40 DEFAULT_MAX_CONSECUTIVE_ERRORS_PER_REQUEST: Final[int] = 3 SHELL_TOOL_KIND_VALUE: Final[str] = "shell" +ApprovalMode: TypeAlias = Literal["always_require", "never_require"] ChatClientT = TypeVar("ChatClientT", bound="SupportsChatGetResponse[Any]") ResponseModelBoundT = TypeVar("ResponseModelBoundT", bound=BaseModel) @@ -270,7 +272,7 @@ class FunctionTool(SerializationMixin): *, name: str, description: str = "", - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, @@ -858,6 +860,15 @@ def normalize_tools( Returns: A normalized list where callable inputs are converted to ``FunctionTool`` using :func:`tool`, and existing tool objects are passed through unchanged. + + Tool-collection wrappers are flattened in two forms: + + - non-tool, non-callable iterables + - mapping-like objects that expose a ``.tools`` collection (for example + ``ToolboxVersionObject`` from azure-ai-projects) + + This lets callers write ``tools=[toolbox, my_func]`` and have the + toolbox's contents spread in alongside individual tools. """ if not tools: return [] @@ -882,6 +893,24 @@ def normalize_tools( if callable(tool_item): # type: ignore[reportUnknownArgumentType] normalized.append(tool(tool_item)) continue + # Mapping-like tool collections (for example ToolboxVersionObject) are + # not flattened by the generic Iterable branch below because they are + # also Mapping instances. If they expose a ``tools`` collection, spread + # that collection into the normalized list. + collection_tools = getattr(tool_item, "tools", None) # type: ignore[reportUnknownArgumentType] + if isinstance(collection_tools, Iterable) and not isinstance( + collection_tools, (str, bytes, bytearray, Mapping) + ): + normalized.extend(normalize_tools(list(collection_tools))) # type: ignore[reportUnknownArgumentType] + continue + # Tool-collection wrapper (e.g. FoundryToolbox): a non-tool, non-callable + # iterable. Flatten its contents so ``tools=[toolbox, my_func]`` works. + # Strings, mappings, and Pydantic BaseModel are excluded — BaseModel + # instances iterate over (field, value) tuples, not tools, so they + # should pass through as leaf tool specs (handled below). + if isinstance(tool_item, Iterable) and not isinstance(tool_item, (str, bytes, bytearray, Mapping, BaseModel)): + normalized.extend(normalize_tools(list(tool_item))) # type: ignore[reportUnknownArgumentType] + continue normalized.append(tool_item) # type: ignore[reportUnknownArgumentType] return normalized @@ -1033,7 +1062,7 @@ def tool( name: str | None = None, description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, @@ -1049,7 +1078,7 @@ def tool( name: str | None = None, description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, @@ -1064,7 +1093,7 @@ def tool( name: str | None = None, description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py index 4b6c2f0401..f3ed9ad2d2 100644 --- a/python/packages/core/agent_framework/_types.py +++ b/python/packages/core/agent_framework/_types.py @@ -351,6 +351,8 @@ ContentType = Literal[ "image_generation_tool_result", "mcp_server_tool_call", "mcp_server_tool_result", + "search_tool_call", + "search_tool_result", "shell_tool_call", "shell_tool_result", "shell_command_output", @@ -864,6 +866,56 @@ class Content: raw_representation=raw_representation, ) + @classmethod + def from_search_tool_call( + cls: type[ContentT], + call_id: str, + *, + tool_name: str, + arguments: str | Mapping[str, Any] | None = None, + status: str | None = None, + annotations: Sequence[Annotation] | None = None, + additional_properties: MutableMapping[str, Any] | None = None, + raw_representation: Any = None, + ) -> ContentT: + """Create search tool call content.""" + return cls( + "search_tool_call", + call_id=call_id, + tool_name=tool_name, + arguments=arguments, + status=status, + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + ) + + @classmethod + def from_search_tool_result( + cls: type[ContentT], + call_id: str, + *, + tool_name: str, + result: Any = None, + items: Sequence[Content] | None = None, + status: str | None = None, + annotations: Sequence[Annotation] | None = None, + additional_properties: MutableMapping[str, Any] | None = None, + raw_representation: Any = None, + ) -> ContentT: + """Create search tool result content.""" + return cls( + "search_tool_result", + call_id=call_id, + tool_name=tool_name, + result=result, + items=list(items) if items is not None else None, + status=status, + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + ) + @classmethod def from_usage( cls: type[ContentT], @@ -1478,7 +1530,7 @@ class Content: return span.lower() == top_level_media_type.lower() def parse_arguments(self) -> dict[str, Any | None] | None: - """Parse arguments from function_call or mcp_server_tool_call content. + """Parse arguments from function_call, mcp_server_tool_call, or search_tool_call content. If arguments cannot be parsed as JSON or the result is not a dict, they are returned as a dictionary with a single key "raw". diff --git a/python/packages/core/agent_framework/foundry/__init__.py b/python/packages/core/agent_framework/foundry/__init__.py index b1d2b88450..c1e47cd6b8 100644 --- a/python/packages/core/agent_framework/foundry/__init__.py +++ b/python/packages/core/agent_framework/foundry/__init__.py @@ -20,6 +20,7 @@ _IMPORTS: dict[str, tuple[str, str]] = { "FoundryEmbeddingOptions": ("agent_framework_foundry", "agent-framework-foundry"), "FoundryEmbeddingSettings": ("agent_framework_foundry", "agent-framework-foundry"), "FoundryEvals": ("agent_framework_foundry", "agent-framework-foundry"), + "FoundryHostedToolType": ("agent_framework_foundry", "agent-framework-foundry"), "FoundryMemoryProvider": ("agent_framework_foundry", "agent-framework-foundry"), "FoundryLocalChatOptions": ("agent_framework_foundry_local", "agent-framework-foundry-local"), "FoundryLocalClient": ("agent_framework_foundry_local", "agent-framework-foundry-local"), @@ -31,6 +32,9 @@ _IMPORTS: dict[str, tuple[str, str]] = { "RawFoundryEmbeddingClient": ("agent_framework_foundry", "agent-framework-foundry"), "evaluate_foundry_target": ("agent_framework_foundry", "agent-framework-foundry"), "evaluate_traces": ("agent_framework_foundry", "agent-framework-foundry"), + "get_toolbox_tool_name": ("agent_framework_foundry", "agent-framework-foundry"), + "get_toolbox_tool_type": ("agent_framework_foundry", "agent-framework-foundry"), + "select_toolbox_tools": ("agent_framework_foundry", "agent-framework-foundry"), } diff --git a/python/packages/core/agent_framework/foundry/__init__.pyi b/python/packages/core/agent_framework/foundry/__init__.pyi index 47eb92b3af..87cc7a3bda 100644 --- a/python/packages/core/agent_framework/foundry/__init__.pyi +++ b/python/packages/core/agent_framework/foundry/__init__.pyi @@ -12,6 +12,7 @@ from agent_framework_foundry import ( FoundryEmbeddingOptions, FoundryEmbeddingSettings, FoundryEvals, + FoundryHostedToolType, FoundryMemoryProvider, RawFoundryAgent, RawFoundryAgentChatClient, @@ -19,6 +20,9 @@ from agent_framework_foundry import ( RawFoundryEmbeddingClient, evaluate_foundry_target, evaluate_traces, + get_toolbox_tool_name, + get_toolbox_tool_type, + select_toolbox_tools, ) from agent_framework_foundry_local import ( FoundryLocalChatOptions, @@ -35,6 +39,7 @@ __all__ = [ "FoundryEmbeddingOptions", "FoundryEmbeddingSettings", "FoundryEvals", + "FoundryHostedToolType", "FoundryLocalChatOptions", "FoundryLocalClient", "FoundryLocalSettings", @@ -46,4 +51,7 @@ __all__ = [ "RawFoundryEmbeddingClient", "evaluate_foundry_target", "evaluate_traces", + "get_toolbox_tool_name", + "get_toolbox_tool_type", + "select_toolbox_tools", ] diff --git a/python/packages/core/tests/core/test_tools.py b/python/packages/core/tests/core/test_tools.py index 91ba663d84..6fa7172295 100644 --- a/python/packages/core/tests/core/test_tools.py +++ b/python/packages/core/tests/core/test_tools.py @@ -1144,3 +1144,160 @@ def test_parse_annotation_with_annotated_and_literal(): # endregion + + +# region normalize_tools flattening of tool-collection wrappers + + +def _make_flatten_function_tool(name: str) -> FunctionTool: + """Build a FunctionTool for flattening tests.""" + + @tool(name=name, description=f"{name} tool") + def _impl(x: int) -> int: + return x + + return _impl # type: ignore[return-value] + + +def test_normalize_tools_flattens_tool_collection_wrapper() -> None: + """A non-tool, non-callable iterable inside the tools list is flattened.""" + from agent_framework._tools import normalize_tools + + inner_a = _make_flatten_function_tool("inner_a") + inner_b = _make_flatten_function_tool("inner_b") + + class ToolBundle: + """Minimal stand-in for a tool-collection wrapper like FoundryToolbox.""" + + def __init__(self, tools: list[FunctionTool]) -> None: + self._tools = tools + + def __iter__(self): + return iter(self._tools) + + bundle = ToolBundle([inner_a, inner_b]) + + normalized = normalize_tools([bundle]) + + assert len(normalized) == 2 + assert normalized[0] is inner_a + assert normalized[1] is inner_b + + +def test_normalize_tools_combines_bundle_with_individual_tools() -> None: + """The canonical ``tools=[bundle, my_func]`` call site spreads bundle + individual.""" + from agent_framework._tools import normalize_tools + + bundled = _make_flatten_function_tool("bundled") + standalone = _make_flatten_function_tool("standalone") + + class ToolBundle: + def __init__(self, tools: list[FunctionTool]) -> None: + self._tools = tools + + def __iter__(self): + return iter(self._tools) + + normalized = normalize_tools([ToolBundle([bundled]), standalone]) + + assert len(normalized) == 2 + assert normalized[0] is bundled + assert normalized[1] is standalone + + +def test_normalize_tools_flattens_nested_bundles() -> None: + """Bundles inside bundles are flattened recursively via the recursive call.""" + from agent_framework._tools import normalize_tools + + inner = _make_flatten_function_tool("deep") + + class ToolBundle: + def __init__(self, tools: list[Any]) -> None: + self._tools = tools + + def __iter__(self): + return iter(self._tools) + + nested = ToolBundle([ToolBundle([inner])]) + + normalized = normalize_tools([nested]) + + assert len(normalized) == 1 + assert normalized[0] is inner + + +def test_normalize_tools_bundle_only_form() -> None: + """Passing a bundle directly (no outer list) also flattens its contents. + + ``tools=bundle`` — the outer wrap-in-list happens in the non-Sequence + branch, then the flattening logic kicks in on the inner pass. + """ + from agent_framework._tools import normalize_tools + + a = _make_flatten_function_tool("a") + b = _make_flatten_function_tool("b") + + class ToolBundle: + def __init__(self, tools: list[FunctionTool]) -> None: + self._tools = tools + + def __iter__(self): + return iter(self._tools) + + normalized = normalize_tools(ToolBundle([a, b])) # type: ignore[arg-type] + + assert len(normalized) == 2 + assert normalized[0] is a + assert normalized[1] is b + + +def test_normalize_tools_does_not_flatten_known_tool_types() -> None: + """FunctionTool / dict / callable are detected before the flatten branch.""" + from agent_framework._tools import normalize_tools + + func_tool = _make_flatten_function_tool("ft") + dict_tool: dict[str, Any] = {"type": "code_interpreter", "container": {"type": "auto"}} + + def plain_callable(x: int) -> int: + return x + + normalized = normalize_tools([func_tool, dict_tool, plain_callable]) + + assert len(normalized) == 3 + assert normalized[0] is func_tool + assert normalized[1] is dict_tool + # plain_callable was wrapped in a FunctionTool via the @tool helper + assert isinstance(normalized[2], FunctionTool) + + +def test_normalize_tools_flattens_mapping_like_toolbox_with_tools_attr() -> None: + """Mapping-like toolbox objects with ``.tools`` should still flatten.""" + from collections.abc import Mapping as MappingABC + + from agent_framework._tools import normalize_tools + + bundled = _make_flatten_function_tool("bundled") + standalone = _make_flatten_function_tool("standalone") + + class ToolBundleMapping(MappingABC[str, Any]): + def __init__(self, tools: list[FunctionTool]) -> None: + self.tools = tools + self._data = {"name": "research_tools", "version": "v1", "tools": tools} + + def __getitem__(self, key: str) -> Any: + return self._data[key] + + def __iter__(self): + return iter(self._data) + + def __len__(self) -> int: + return len(self._data) + + normalized = normalize_tools([ToolBundleMapping([bundled]), standalone]) + + assert len(normalized) == 2 + assert normalized[0] is bundled + assert normalized[1] is standalone + + +# endregion diff --git a/python/packages/foundry/README.md b/python/packages/foundry/README.md index e22fb523a5..26f9a6e309 100644 --- a/python/packages/foundry/README.md +++ b/python/packages/foundry/README.md @@ -1,3 +1,66 @@ # Agent Framework Foundry This package contains the Microsoft Foundry integrations for Microsoft Agent Framework, including Foundry chat clients, preconfigured Foundry agents, Foundry embedding clients, and Foundry memory providers. + +## Toolboxes + +A *toolbox* is a named, versioned bundle of hosted tool configurations — code interpreter, file search, image generation, MCP, web search, and so on — stored inside a Microsoft Foundry project. Toolboxes let you manage tool configuration once and reuse it across agents. + +### Authoring a toolbox + +Toolboxes can be authored two ways: + +- **Foundry portal** — create and version toolboxes through the UI without touching code. +- **Programmatically** — use the [`azure-ai-projects`](https://pypi.org/project/azure-ai-projects/) SDK to create, update, and version toolboxes from Python. + +> Toolbox authoring APIs (`ToolboxVersionObject`, `ToolboxObject`, `project_client.beta.toolboxes.*`) require `azure-ai-projects>=2.1.0`. Earlier versions can only consume toolboxes that already exist. + +### Using toolboxes with `FoundryAgent` + +For hosted `FoundryAgent`, the toolbox must already be attached to the agent in the Microsoft Foundry project. Once attached, the agent invokes its toolbox tools transparently — no client-side wiring required — and you interact with the agent the same way you would with any other tool-equipped Foundry agent. + +### Using toolboxes with `FoundryChatClient` + +There are two patterns for wiring a toolbox into a `FoundryChatClient`-backed agent. + +**1. Fetch, optionally filter, and pass the tools directly** + +Load the toolbox from the Microsoft Foundry project, optionally select a subset of its tools, and hand them to an `Agent` alongside any other tools you own: + +```python +from agent_framework import Agent +from agent_framework.foundry import FoundryChatClient, select_toolbox_tools + +client = FoundryChatClient(...) +toolbox = await client.get_toolbox("my-toolbox", version="3") + +# Pass the whole toolbox: +agent = Agent(client=client, tools=toolbox) + +# Or filter to a subset first: +selected = select_toolbox_tools(toolbox, include_types=["code_interpreter", "mcp"]) +agent = Agent(client=client, tools=selected) +``` + +See [`foundry_chat_client_with_toolbox.py`](../../samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox.py) for a full example, including combining multiple toolboxes. + +**2. Connect to the toolbox's MCP endpoint with `MCPStreamableHTTPTool`** + +Each toolbox is reachable as an MCP server. Instead of fetching and fanning out its individual tool definitions, you can point a MAF `MCPStreamableHTTPTool` at the toolbox's MCP endpoint — the agent then discovers and calls its tools over MCP at runtime: + +```python +from agent_framework import Agent, MCPStreamableHTTPTool +from agent_framework.foundry import FoundryChatClient + +async with Agent( + client=FoundryChatClient(...), + instructions="You are a helpful assistant. Use the toolbox tools when useful.", + tools=MCPStreamableHTTPTool( + name="my_toolbox", + description="Tools served by my Foundry toolbox", + url="https://", + ), +) as agent: + result = await agent.run("What tools are available?") + print(result.text) +``` diff --git a/python/packages/foundry/agent_framework_foundry/__init__.py b/python/packages/foundry/agent_framework_foundry/__init__.py index fbd1376735..b70d1720f2 100644 --- a/python/packages/foundry/agent_framework_foundry/__init__.py +++ b/python/packages/foundry/agent_framework_foundry/__init__.py @@ -16,6 +16,7 @@ from ._foundry_evals import ( evaluate_traces, ) from ._memory_provider import FoundryMemoryProvider +from ._tools import FoundryHostedToolType, get_toolbox_tool_name, get_toolbox_tool_type, select_toolbox_tools try: __version__ = importlib.metadata.version(__name__) @@ -30,6 +31,7 @@ __all__ = [ "FoundryEmbeddingOptions", "FoundryEmbeddingSettings", "FoundryEvals", + "FoundryHostedToolType", "FoundryMemoryProvider", "RawFoundryAgent", "RawFoundryAgentChatClient", @@ -38,4 +40,7 @@ __all__ = [ "__version__", "evaluate_foundry_target", "evaluate_traces", + "get_toolbox_tool_name", + "get_toolbox_tool_type", + "select_toolbox_tools", ] diff --git a/python/packages/foundry/agent_framework_foundry/_agent.py b/python/packages/foundry/agent_framework_foundry/_agent.py index bf5d936d9d..0c7f93ba1f 100644 --- a/python/packages/foundry/agent_framework_foundry/_agent.py +++ b/python/packages/foundry/agent_framework_foundry/_agent.py @@ -34,6 +34,8 @@ from azure.ai.projects.aio import AIProjectClient from azure.core.credentials import TokenCredential from azure.core.credentials_async import AsyncTokenCredential +from ._tools import sanitize_foundry_response_tool + if sys.version_info >= (3, 13): from typing import TypeVar # type: ignore # pragma: no cover else: @@ -307,6 +309,20 @@ class RawFoundryAgentChatClient( # type: ignore[misc] """Skip model check — model is configured on the Foundry agent.""" pass + @override + def _prepare_tools_for_openai( + self, + tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None, + ) -> list[Any]: + """Prepare tools for Foundry agent Responses API calls. + + Mirrors ``RawFoundryChatClient`` sanitization so toolbox-fetched MCP + tools with extra read-model fields continue to work through the agent + surface. + """ + response_tools = super()._prepare_tools_for_openai(tools) + return [sanitize_foundry_response_tool(tool_item) for tool_item in response_tools] + def _prepare_messages_for_azure_ai(self, messages: Sequence[Message]) -> tuple[list[Message], str | None]: """Extract system/developer messages as instructions for Azure AI. diff --git a/python/packages/foundry/agent_framework_foundry/_chat_client.py b/python/packages/foundry/agent_framework_foundry/_chat_client.py index d9e2483fbe..7c9eb3a68c 100644 --- a/python/packages/foundry/agent_framework_foundry/_chat_client.py +++ b/python/packages/foundry/agent_framework_foundry/_chat_client.py @@ -16,6 +16,7 @@ from agent_framework import ( load_settings, ) from agent_framework._compaction import CompactionStrategy, TokenizerProtocol +from agent_framework._feature_stage import ExperimentalFeature, experimental from agent_framework.observability import ChatTelemetryLayer from agent_framework_openai._chat_client import OpenAIChatOptions, RawOpenAIChatClient from azure.ai.projects.aio import AIProjectClient @@ -32,6 +33,8 @@ from azure.ai.projects.models import MCPTool as FoundryMCPTool from azure.core.credentials import TokenCredential from azure.core.credentials_async import AsyncTokenCredential +from ._tools import fetch_toolbox, sanitize_foundry_response_tool + if sys.version_info >= (3, 13): from typing import TypeVar # type: ignore # pragma: no cover else: @@ -46,7 +49,8 @@ else: from typing_extensions import TypedDict # type: ignore # pragma: no cover if TYPE_CHECKING: - from agent_framework import ChatAndFunctionMiddlewareTypes + from agent_framework import ChatAndFunctionMiddlewareTypes, ToolTypes + from azure.ai.projects.models import ToolboxVersionObject logger: logging.Logger = logging.getLogger("agent_framework.foundry") @@ -218,6 +222,21 @@ class RawFoundryChatClient( # type: ignore[misc] raise ValueError("model must be a non-empty string") options["model"] = self.model + @override + def _prepare_tools_for_openai( + self, + tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None, + ) -> list[Any]: + """Prepare tools for Foundry Responses API calls. + + Foundry toolbox reads can surface MCP tool objects with extra fields + (for example ``name``) that are accepted by the toolbox API but rejected + by the Responses API. Sanitize those hosted-tool payloads before sending + them downstream. + """ + response_tools = super()._prepare_tools_for_openai(tools) + return [sanitize_foundry_response_tool(tool_item) for tool_item in response_tools] + async def configure_azure_monitor( self, enable_sensitive_data: bool = False, @@ -460,6 +479,37 @@ class RawFoundryChatClient( # type: ignore[misc] # endregion + # region Toolbox methods (instance methods — these hit the network) + + @experimental(feature_id=ExperimentalFeature.TOOLBOXES) + async def get_toolbox( + self, + name: str, + *, + version: str | None = None, + ) -> ToolboxVersionObject: + """Fetch a Foundry toolbox by name. + + If ``version`` is omitted, resolves the toolbox's current default version + (two requests). If ``version`` is specified, fetches that version directly + (single request). + + Args: + name: The name of the toolbox. + + Keyword Args: + version: Optional immutable version identifier to pin to. + + Returns: + A ``ToolboxVersionObject``. Pass its ``tools`` attribute to + ``Agent(tools=toolbox.tools)``. + + Raises: + azure.core.exceptions.ResourceNotFoundError: If the toolbox or + the requested version does not exist. + """ + return await fetch_toolbox(self.project_client, name, version) + class FoundryChatClient( # type: ignore[misc] FunctionInvocationLayer[FoundryChatOptionsT], diff --git a/python/packages/foundry/agent_framework_foundry/_tools.py b/python/packages/foundry/agent_framework_foundry/_tools.py new file mode 100644 index 0000000000..3c22872e18 --- /dev/null +++ b/python/packages/foundry/agent_framework_foundry/_tools.py @@ -0,0 +1,166 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Shared tool helpers for Foundry chat clients. + +Includes: + +* *Toolbox* helpers — a *toolbox* is a named, versioned bundle of tool + definitions stored in an Azure AI Foundry project. +* Responses-API payload sanitization for Foundry hosted tools. +""" + +from __future__ import annotations + +from collections.abc import Callable, Collection, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Literal, TypeAlias, cast + +from agent_framework._feature_stage import ExperimentalFeature, experimental +from azure.ai.projects.models import MCPTool as FoundryMCPTool + +if TYPE_CHECKING: + from azure.ai.projects.aio import AIProjectClient + from azure.ai.projects.models import Tool, ToolboxVersionObject + +FoundryHostedToolType: TypeAlias = ( + Literal[ + "code_interpreter", + "file_search", + "image_generation", + "mcp", + "web_search", + ] + | str +) +ToolboxToolSelectionInput: TypeAlias = "ToolboxVersionObject | Sequence[Tool | dict[str, Any]]" + + +@experimental(feature_id=ExperimentalFeature.TOOLBOXES) +async def fetch_toolbox( + project_client: AIProjectClient, + name: str, + version: str | None = None, +) -> ToolboxVersionObject: + """Fetch a toolbox version via an ``AIProjectClient``. + + If ``version`` is omitted, resolves the toolbox's current default + version (two requests: one to ``.get(name)`` for the default version + pointer, one to ``.get_version(name, version)`` for the tools). If + ``version`` is specified, fetches that version directly (single request). + """ + if version is None: + handle = await project_client.beta.toolboxes.get(name) + version = handle.default_version + return await project_client.beta.toolboxes.get_version(name, version) + + +@experimental(feature_id=ExperimentalFeature.TOOLBOXES) +def get_toolbox_tool_name(tool: Tool | dict[str, Any]) -> str | None: + """Return the best-effort display/selection name for a toolbox tool. + + Selection precedence: + 1. MCP ``server_label`` + 2. Generic tool ``name`` + 3. Tool ``type`` + """ + if isinstance(tool, dict): + if server_label := tool.get("server_label"): + return str(server_label) + if name := tool.get("name"): + return str(name) + if tool_type := tool.get("type"): + return str(tool_type) + return None + + if server_label := getattr(tool, "server_label", None): + return str(server_label) + if name := getattr(tool, "name", None): + return str(name) + if tool_type := getattr(tool, "type", None): + return str(tool_type) + return None + + +@experimental(feature_id=ExperimentalFeature.TOOLBOXES) +def get_toolbox_tool_type(tool: Tool | dict[str, Any]) -> str | None: + """Return the raw tool ``type`` if present.""" + tool_type = tool.get("type") if isinstance(tool, dict) else getattr(tool, "type", None) + return str(tool_type) if tool_type is not None else None + + +@experimental(feature_id=ExperimentalFeature.TOOLBOXES) +def select_toolbox_tools( + tools: ToolboxToolSelectionInput, + *, + include_names: Collection[str] | None = None, + exclude_names: Collection[str] | None = None, + include_types: Collection[FoundryHostedToolType] | None = None, + exclude_types: Collection[FoundryHostedToolType] | None = None, + predicate: Callable[[Tool | dict[str, Any]], bool] | None = None, +) -> list[Tool | dict[str, Any]]: + """Filter toolbox tools by normalized name, raw type, and/or predicate. + + Normalized name precedence: + 1. ``server_label`` for MCP tools + 2. ``name`` + 3. ``type`` + """ + tool_items: Sequence[Tool | dict[str, Any]] = ( + tools if isinstance(tools, Sequence) else cast("Sequence[Tool | dict[str, Any]]", tools.tools) + ) + include_name_set = {str(item) for item in include_names} if include_names is not None else None + exclude_name_set = {str(item) for item in exclude_names} if exclude_names is not None else None + include_type_set = {str(item) for item in include_types} if include_types is not None else None + exclude_type_set = {str(item) for item in exclude_types} if exclude_types is not None else None + + selected: list[Tool | dict[str, Any]] = [] + for tool in tool_items: + tool_name = get_toolbox_tool_name(tool) + tool_type = get_toolbox_tool_type(tool) + + if include_name_set is not None and tool_name not in include_name_set: + continue + if exclude_name_set is not None and tool_name in exclude_name_set: + continue + if include_type_set is not None and tool_type not in include_type_set: + continue + if exclude_type_set is not None and tool_type in exclude_type_set: + continue + if predicate is not None and not predicate(tool): + continue + + selected.append(tool) + + return selected + + +@experimental(feature_id=ExperimentalFeature.TOOLBOXES) +def sanitize_foundry_response_tool(tool_item: Any) -> Any: + """Return a Responses-API-safe tool payload for Foundry hosted tools. + + Azure AI Projects toolbox reads can currently return hosted tool objects with + extra read-model decoration fields such as top-level ``name`` and + ``description``. Azure AI Foundry rejects at least ``name`` on Responses API + requests with: + + ``Unknown parameter: 'tools[0].name'``. + + We defensively strip these decoration fields for non-function hosted tools so + the round-trip + ``toolbox.tools -> Agent(..., tools=...) -> run()`` works, while the Azure + SDK/service behavior is corrected upstream. + """ + if isinstance(tool_item, FoundryMCPTool): + sanitized: dict[str, Any] = dict(cast("Mapping[str, Any]", tool_item)) + sanitized.pop("name", None) + sanitized.pop("description", None) + return sanitized + + if isinstance(tool_item, Mapping): + mapping = cast("Mapping[str, Any]", tool_item) + if "type" in mapping and mapping.get("type") not in {"function", "custom"}: + sanitized = dict(mapping) + sanitized.pop("name", None) + sanitized.pop("description", None) + return sanitized + + return cast(Any, tool_item) diff --git a/python/packages/foundry/pyproject.toml b/python/packages/foundry/pyproject.toml index 69d58ee3e5..67feb98c98 100644 --- a/python/packages/foundry/pyproject.toml +++ b/python/packages/foundry/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "agent-framework-core>=1.0.1,<2", "agent-framework-openai>=1.0.1,<2", "azure-ai-inference>=1.0.0b9,<1.0.0b10", - "azure-ai-projects>=2.0.0,<3.0", + "azure-ai-projects>=2.1.0,<3.0", ] [tool.uv] diff --git a/python/packages/foundry/tests/foundry/test_foundry_chat_client.py b/python/packages/foundry/tests/foundry/test_foundry_chat_client.py index 40fc06d3ef..a7c5beb822 100644 --- a/python/packages/foundry/tests/foundry/test_foundry_chat_client.py +++ b/python/packages/foundry/tests/foundry/test_foundry_chat_client.py @@ -15,6 +15,7 @@ from agent_framework import ChatResponse, Content, Message, SupportsChatGetRespo from agent_framework._telemetry import AGENT_FRAMEWORK_USER_AGENT from agent_framework.exceptions import ChatClientException, ChatClientInvalidRequestException from agent_framework_openai import OpenAIContentFilterException +from azure.ai.projects.models import MCPTool as FoundryMCPTool from azure.core.exceptions import ResourceNotFoundError from azure.identity import AzureCliCredential from openai import BadRequestError @@ -608,6 +609,82 @@ def test_get_mcp_tool_with_project_connection_id() -> None: assert tool_config["server_label"] == "Docs_MCP" +def test_prepare_tools_for_openai_strips_extraneous_name_from_foundry_mcp_tool() -> None: + """Toolbox-returned MCP tools may carry ``name``; Foundry Responses rejects it.""" + project_client = MagicMock() + project_client.get_openai_client.return_value = _make_mock_openai_client() + client = FoundryChatClient(project_client=project_client, model="test-model") + + tool = FoundryMCPTool( + server_label="githubmcp", + server_url="https://api.githubcopilot.com/mcp", + ) + tool["project_connection_id"] = "githubmcp" + tool["name"] = "githubmcp" + + response_tools = client._prepare_tools_for_openai([tool]) + + assert len(response_tools) == 1 + prepared = response_tools[0] + assert prepared["type"] == "mcp" + assert prepared["server_label"] == "githubmcp" + assert prepared["project_connection_id"] == "githubmcp" + assert "name" not in prepared + + +def test_prepare_tools_for_openai_strips_read_model_fields_from_toolbox_code_interpreter() -> None: + """Toolbox-returned code interpreter tools may carry read-model-only name/description.""" + project_client = MagicMock() + project_client.get_openai_client.return_value = _make_mock_openai_client() + client = FoundryChatClient(project_client=project_client, model="test-model") + + tool = { + "type": "code_interpreter", + "name": "code_interpreter_t6bbtm", + "description": "Toolbox read model description", + "container": {"file_ids": [], "type": "auto"}, + } + + response_tools = client._prepare_tools_for_openai([tool]) + + assert len(response_tools) == 1 + prepared = response_tools[0] + assert prepared["type"] == "code_interpreter" + assert prepared["container"] == {"file_ids": [], "type": "auto"} + assert "name" not in prepared + assert "description" not in prepared + + +def test_prepare_tools_for_openai_strips_name_from_non_function_hosted_tool_dicts() -> None: + """All non-function hosted tool payloads should drop top-level read-model names.""" + project_client = MagicMock() + project_client.get_openai_client.return_value = _make_mock_openai_client() + client = FoundryChatClient(project_client=project_client, model="test-model") + + response_tools = client._prepare_tools_for_openai([ + { + "type": "file_search", + "name": "file_search_tool_123", + "description": "toolbox decoration", + "vector_store_ids": ["vs_123"], + }, + { + "type": "web_search", + "name": "web_search_tool_456", + "description": "toolbox decoration", + }, + ]) + + assert len(response_tools) == 2 + assert response_tools[0]["type"] == "file_search" + assert response_tools[0]["vector_store_ids"] == ["vs_123"] + assert "name" not in response_tools[0] + assert "description" not in response_tools[0] + assert response_tools[1]["type"] == "web_search" + assert "name" not in response_tools[1] + assert "description" not in response_tools[1] + + @pytest.mark.flaky @pytest.mark.integration @skip_if_foundry_integration_tests_disabled diff --git a/python/packages/foundry/tests/test_toolbox.py b/python/packages/foundry/tests/test_toolbox.py new file mode 100644 index 0000000000..1933084e10 --- /dev/null +++ b/python/packages/foundry/tests/test_toolbox.py @@ -0,0 +1,435 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Unit tests for toolbox helpers on FoundryChatClient. + +Return types are the raw azure-ai-projects SDK models (ToolboxVersionObject, +ToolboxObject) — no custom wrapper. Tests verify the chat-client get path and +tool-selection ergonomics. +""" + +from __future__ import annotations + +import datetime as dt +import os +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +try: + from azure.ai.projects.models import ( + AutoCodeInterpreterToolParam, + CodeInterpreterTool, + Tool, + ToolboxObject, + ToolboxVersionObject, + ) +except ImportError: + pytest.skip( + "Toolbox types require azure-ai-projects>=2.1.0 (unreleased).", + allow_module_level=True, + ) + +from azure.core.exceptions import ResourceNotFoundError +from azure.identity import AzureCliCredential + +# --------------------------------------------------------------------------- # +# Helpers # +# --------------------------------------------------------------------------- # + + +class _AsyncIter: + """Minimal async-iterable for mocking ``AsyncItemPaged`` in tests.""" + + def __init__(self, items: list[Any]) -> None: + self._items = items + + def __aiter__(self) -> _AsyncIter: + self._iter = iter(self._items) + return self + + async def __anext__(self) -> Any: + try: + return next(self._iter) + except StopIteration: + raise StopAsyncIteration from None + + +def _make_code_interpreter() -> CodeInterpreterTool: + return CodeInterpreterTool(container=AutoCodeInterpreterToolParam()) + + +def _make_version_object( + *, + name: str = "research_tools", + version: str = "v1", + tools: list[Tool] | None = None, + description: str | None = None, +) -> ToolboxVersionObject: + return ToolboxVersionObject( + id=f"tbv_{name}_{version}", + name=name, + version=version, + metadata={}, + created_at=dt.datetime(2026, 4, 10, tzinfo=dt.timezone.utc), + tools=tools if tools is not None else [_make_code_interpreter()], + description=description, + ) + + +def _make_mock_foundry_client(*, project_client: MagicMock) -> Any: + """Build a FoundryChatClient wired to a mock project_client.""" + from agent_framework_foundry import FoundryChatClient + + project_client.get_openai_client = MagicMock(return_value=MagicMock()) + return FoundryChatClient(project_client=project_client, model="test-model") + + +# --------------------------------------------------------------------------- # +# get_toolbox — explicit version path # +# --------------------------------------------------------------------------- # + + +async def test_get_toolbox_with_explicit_version_makes_single_request() -> None: + project_client = MagicMock() + version_obj = _make_version_object(name="research_tools", version="v3") + project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj) + project_client.beta.toolboxes.get = AsyncMock( + side_effect=AssertionError("get() must not be called when version is explicit") + ) + + client = _make_mock_foundry_client(project_client=project_client) + + toolbox = await client.get_toolbox("research_tools", version="v3") + + assert isinstance(toolbox, ToolboxVersionObject) + assert toolbox.name == "research_tools" + assert toolbox.version == "v3" + project_client.beta.toolboxes.get_version.assert_awaited_once_with("research_tools", "v3") + project_client.beta.toolboxes.get.assert_not_called() + + +# --------------------------------------------------------------------------- # +# get_toolbox — default-version path + error + passthrough + smoke # +# --------------------------------------------------------------------------- # + + +async def test_get_toolbox_default_version_resolves_then_fetches() -> None: + project_client = MagicMock() + handle = ToolboxObject(id="tb_1", name="research_tools", default_version="v5") + version_obj = _make_version_object(name="research_tools", version="v5") + + project_client.beta.toolboxes.get = AsyncMock(return_value=handle) + project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj) + + client = _make_mock_foundry_client(project_client=project_client) + + toolbox = await client.get_toolbox("research_tools") + + assert toolbox.version == "v5" + project_client.beta.toolboxes.get.assert_awaited_once_with("research_tools") + project_client.beta.toolboxes.get_version.assert_awaited_once_with("research_tools", "v5") + + +async def test_get_toolbox_propagates_resource_not_found() -> None: + project_client = MagicMock() + project_client.beta.toolboxes.get = AsyncMock(side_effect=ResourceNotFoundError("no such toolbox")) + + client = _make_mock_foundry_client(project_client=project_client) + + with pytest.raises(ResourceNotFoundError): + await client.get_toolbox("missing_toolbox") + + +async def test_get_toolbox_tool_passthrough_preserves_heterogeneous_types() -> None: + """Ensure all Tool subclasses pass through unchanged — critical for MCP tools + with project_connection_id, which must reach the runtime untouched.""" + from azure.ai.projects.models import MCPTool as FoundryMCPTool + + mcp_tool = FoundryMCPTool( + server_label="github_oauth", + server_url="https://api.githubcopilot.com/mcp", + ) + mcp_tool["project_connection_id"] = "conn_abc" + + project_client = MagicMock() + version_obj = _make_version_object( + name="mixed", + version="v1", + tools=[_make_code_interpreter(), mcp_tool], + ) + project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj) + + client = _make_mock_foundry_client(project_client=project_client) + + toolbox = await client.get_toolbox("mixed", version="v1") + + assert len(toolbox.tools) == 2 + assert isinstance(toolbox.tools[0], CodeInterpreterTool) + assert isinstance(toolbox.tools[1], FoundryMCPTool) + assert toolbox.tools[1]["project_connection_id"] == "conn_abc" + + +async def test_toolbox_tools_can_be_passed_to_agent() -> None: + """Integration smoke: toolbox.tools can be passed directly to Agent(tools=...) .""" + from agent_framework import Agent + + project_client = MagicMock() + version_obj = _make_version_object(name="research_tools", version="v1", tools=[_make_code_interpreter()]) + project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj) + + client = _make_mock_foundry_client(project_client=project_client) + + toolbox = await client.get_toolbox("research_tools", version="v1") + + agent = Agent( + client=client, + instructions="You are a test agent.", + tools=toolbox.tools, + ) + + agent_tools = agent.default_options["tools"] + assert len(agent_tools) == 1 + assert agent_tools[0]["type"] == "code_interpreter" + + +async def test_multiple_toolbox_tool_lists_can_be_combined_in_agent() -> None: + """Nested toolbox ``.tools`` lists flatten into one tool list on Agent construction.""" + from agent_framework import Agent + + project_client = MagicMock() + project_client.get_openai_client = MagicMock(return_value=MagicMock()) + client = _make_mock_foundry_client(project_client=project_client) + + toolbox_a = _make_version_object(name="research_tools", version="v1", tools=[_make_code_interpreter()]) + toolbox_b = _make_version_object(name="some_other_tools", version="v3", tools=[_make_code_interpreter()]) + + agent = Agent( + client=client, + instructions="You are a test agent.", + tools=[toolbox_a.tools, toolbox_b.tools], + ) + + agent_tools = agent.default_options["tools"] + assert len(agent_tools) == 2 + assert agent_tools[0]["type"] == "code_interpreter" + assert agent_tools[1]["type"] == "code_interpreter" + + +# --------------------------------------------------------------------------- # +# toolbox tool selection helpers # +# --------------------------------------------------------------------------- # + + +def test_get_toolbox_tool_name_prefers_server_label_then_name_then_type() -> None: + from azure.ai.projects.models import MCPTool as FoundryMCPTool + + from agent_framework_foundry import get_toolbox_tool_name + + mcp_tool = FoundryMCPTool( + server_label="githubmcp", + server_url="https://api.githubcopilot.com/mcp", + ) + assert get_toolbox_tool_name(mcp_tool) == "githubmcp" + + named_tool = {"type": "code_interpreter", "name": "ci_tool"} + assert get_toolbox_tool_name(named_tool) == "ci_tool" + + unnamed_tool = {"type": "web_search"} + assert get_toolbox_tool_name(unnamed_tool) == "web_search" + + +def test_select_toolbox_tools_filters_by_names() -> None: + from azure.ai.projects.models import MCPTool as FoundryMCPTool + + from agent_framework_foundry import select_toolbox_tools + + tools: list[Tool | dict[str, Any]] = [ + FoundryMCPTool(server_label="githubmcp", server_url="https://api.githubcopilot.com/mcp"), + {"type": "code_interpreter", "name": "python_runner"}, + {"type": "web_search"}, + ] + + selected = select_toolbox_tools(tools, include_names=["githubmcp", "python_runner"]) + + assert len(selected) == 2 + assert selected[0] is tools[0] + assert selected[1] is tools[1] + + +def test_select_toolbox_tools_filters_by_typed_tool_types() -> None: + from agent_framework_foundry import select_toolbox_tools + + tools: list[Tool | dict[str, Any]] = [ + {"type": "mcp", "server_label": "githubmcp"}, + {"type": "code_interpreter", "name": "python_runner"}, + {"type": "web_search"}, + ] + + selected = select_toolbox_tools(tools, include_types=["mcp", "code_interpreter"]) + + assert len(selected) == 2 + assert selected[0]["type"] == "mcp" + assert selected[1]["type"] == "code_interpreter" + + +def test_select_toolbox_tools_accepts_toolbox_object_directly() -> None: + from agent_framework_foundry import select_toolbox_tools + + toolbox = _make_version_object( + name="research_tools", + version="v1", + tools=[ + {"type": "mcp", "server_label": "githubmcp"}, # type: ignore[list-item] + {"type": "code_interpreter", "name": "python_runner"}, # type: ignore[list-item] + {"type": "web_search"}, # type: ignore[list-item] + ], + ) + + selected = select_toolbox_tools(toolbox, include_types=["mcp", "code_interpreter"]) + + assert len(selected) == 2 + assert selected[0]["type"] == "mcp" + assert selected[1]["type"] == "code_interpreter" + + +async def test_fetched_toolbox_can_be_combined_with_function_tool() -> None: + from agent_framework import Agent, FunctionTool, tool + + project_client = MagicMock() + version_obj = _make_version_object(name="research_tools", version="v1", tools=[_make_code_interpreter()]) + project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj) + + client = _make_mock_foundry_client(project_client=project_client) + toolbox = await client.get_toolbox("research_tools", version="v1") + + @tool(name="local_lookup", description="A local helper tool") + def local_lookup(query: str) -> str: + return query + + agent = Agent( + client=client, + instructions="You are a test agent.", + tools=[toolbox, local_lookup], + ) + + agent_tools = agent.default_options["tools"] + assert len(agent_tools) == 2 + assert agent_tools[0]["type"] == "code_interpreter" + assert isinstance(agent_tools[1], FunctionTool) + assert agent_tools[1].name == "local_lookup" + + +def test_select_toolbox_tools_supports_excludes_and_predicate() -> None: + from agent_framework_foundry import select_toolbox_tools + + tools: list[Tool | dict[str, Any]] = [ + {"type": "mcp", "server_label": "githubmcp"}, + {"type": "mcp", "server_label": "learnmcp"}, + {"type": "web_search"}, + ] + + selected = select_toolbox_tools( + tools, + exclude_names=["learnmcp"], + predicate=lambda tool: tool.get("type") == "mcp", # type: ignore[union-attr] + ) + + assert len(selected) == 1 + assert selected[0]["server_label"] == "githubmcp" + + +async def test_selected_toolbox_subset_can_be_combined_with_function_tool() -> None: + from agent_framework import Agent, FunctionTool, tool + + from agent_framework_foundry import select_toolbox_tools + + project_client = MagicMock() + version_obj = _make_version_object( + name="research_tools", + version="v1", + tools=[ + {"type": "mcp", "server_label": "githubmcp"}, # type: ignore[list-item] + {"type": "code_interpreter", "name": "python_runner"}, # type: ignore[list-item] + {"type": "web_search"}, # type: ignore[list-item] + ], + ) + project_client.beta.toolboxes.get_version = AsyncMock(return_value=version_obj) + + client = _make_mock_foundry_client(project_client=project_client) + toolbox = await client.get_toolbox("research_tools", version="v1") + selected_tools = select_toolbox_tools(toolbox, include_types=["mcp", "code_interpreter"]) + + @tool(name="local_lookup", description="A local helper tool") + def local_lookup(query: str) -> str: + return query + + agent = Agent( + client=client, + instructions="You are a test agent.", + tools=[selected_tools, local_lookup], + ) + + agent_tools = agent.default_options["tools"] + assert len(agent_tools) == 3 + assert agent_tools[0]["type"] == "mcp" + assert agent_tools[1]["type"] == "code_interpreter" + assert isinstance(agent_tools[2], FunctionTool) + assert agent_tools[2].name == "local_lookup" + + +# --------------------------------------------------------------------------- # +# Integration # +# --------------------------------------------------------------------------- # + + +skip_if_foundry_integration_tests_disabled = pytest.mark.skipif( + os.getenv("FOUNDRY_PROJECT_ENDPOINT", "") in ("", "https://test-project.services.ai.azure.com/") + or os.getenv("FOUNDRY_MODEL", "") == "", + reason="No real FOUNDRY_PROJECT_ENDPOINT or FOUNDRY_MODEL provided; skipping integration tests.", +) + + +@pytest.mark.flaky +@pytest.mark.integration +@skip_if_foundry_integration_tests_disabled +async def test_integration_get_toolbox_round_trip_against_real_project() -> None: + """Create a toolbox via the raw SDK, fetch via FoundryChatClient, then delete. + + Self-contained to avoid depending on toolboxes that may be cleaned up + externally. Exercises both the default-version resolution path + (``get`` + ``get_version``) and the explicit-version path. + """ + from uuid import uuid4 + + from agent_framework import Agent + + from agent_framework_foundry import FoundryChatClient + + client = FoundryChatClient(credential=AzureCliCredential()) + project_client = client.project_client + + toolbox_name = f"af-int-toolbox-{uuid4().hex[:12]}" + created = await project_client.beta.toolboxes.create_version( + name=toolbox_name, + tools=[CodeInterpreterTool()], + description=f"{toolbox_name} integration test", + ) + assert isinstance(created, ToolboxVersionObject) + try: + toolbox_default = await client.get_toolbox(toolbox_name) + assert toolbox_default.name == toolbox_name + assert toolbox_default.tools, "Default-version fetch returned no tools" + + toolbox_pinned = await client.get_toolbox(toolbox_name, version=created.version) + assert toolbox_pinned.version == created.version + assert toolbox_pinned.tools + + agent = Agent( + client=client, + instructions="You are a test agent.", + tools=toolbox_pinned.tools, + ) + assert len(agent.default_options["tools"]) == len(toolbox_pinned.tools) + finally: + await project_client.beta.toolboxes.delete(toolbox_name) diff --git a/python/packages/hyperlight/LICENSE b/python/packages/hyperlight/LICENSE new file mode 100644 index 0000000000..9e841e7a26 --- /dev/null +++ b/python/packages/hyperlight/LICENSE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/python/packages/hyperlight/README.md b/python/packages/hyperlight/README.md new file mode 100644 index 0000000000..1b1bc1e0ce --- /dev/null +++ b/python/packages/hyperlight/README.md @@ -0,0 +1,132 @@ +# agent-framework-hyperlight + +Alpha Hyperlight-backed CodeAct integrations for Microsoft Agent Framework. + +## Installation + +```bash +pip install agent-framework-hyperlight --pre +``` + +This package depends on `hyperlight-sandbox`, the packaged Python guest, and the +Wasm backend package on supported platforms. If the backend is not published for +your current platform yet, `execute_code` will fail at runtime when it tries to +create the sandbox. + +## Quick start + +### Context provider (recommended) + +Use `HyperlightCodeActProvider` to automatically inject the `execute_code` tool +and CodeAct instructions into every agent run. Tools registered on the provider +are available inside the sandbox via `call_tool(...)` but are **not** exposed as +direct agent tools. + +```python +from agent_framework import Agent, tool +from agent_framework_hyperlight import HyperlightCodeActProvider + +@tool +def compute(operation: str, a: float, b: float) -> float: + """Perform a math operation.""" + ops = {"add": a + b, "subtract": a - b, "multiply": a * b, "divide": a / b} + return ops[operation] + +codeact = HyperlightCodeActProvider( + tools=[compute], + approval_mode="never_require", +) + +agent = Agent( + client=client, + name="CodeActAgent", + instructions="You are a helpful assistant.", + context_providers=[codeact], +) + +result = await agent.run("Multiply 6 by 7 using execute_code.") +``` + +### Standalone tool + +Use `HyperlightExecuteCodeTool` directly when you want full control over how the +tool is added to the agent. This is useful when mixing sandbox tools with +direct-only tools on the same agent. + +```python +from agent_framework import Agent, tool +from agent_framework_hyperlight import HyperlightExecuteCodeTool + +@tool +def send_email(to: str, subject: str, body: str) -> str: + """Send an email (direct-only, not available inside the sandbox).""" + return f"Email sent to {to}" + +execute_code = HyperlightExecuteCodeTool( + tools=[compute], + approval_mode="never_require", +) + +agent = Agent( + client=client, + name="MixedToolsAgent", + instructions="You are a helpful assistant.", + tools=[send_email, execute_code], +) +``` + +### Manual static wiring + +For fixed configurations where provider lifecycle overhead is unnecessary, build +the CodeAct instructions once and pass them to the agent at construction time: + +```python +execute_code = HyperlightExecuteCodeTool( + tools=[compute], + approval_mode="never_require", +) + +codeact_instructions = execute_code.build_instructions(tools_visible_to_model=False) + +agent = Agent( + client=client, + name="StaticWiringAgent", + instructions=f"You are a helpful assistant.\n\n{codeact_instructions}", + tools=[execute_code], +) +``` + +### File mounts and network access + +Mount host directories into the sandbox and allow outbound HTTP to specific +domains: + +```python +from agent_framework_hyperlight import HyperlightCodeActProvider, FileMount + +codeact = HyperlightCodeActProvider( + tools=[compute], + file_mounts=[ + "/host/data", # shorthand — same path in sandbox + ("/host/models", "/sandbox/models"), # explicit host → sandbox mapping + FileMount("/host/config", "/sandbox/config"), # named tuple + ], + allowed_domains=[ + "api.github.com", # all methods + ("internal.api.example.com", "GET"), # GET only + ], +) +``` + +## Notes + +- This package is intentionally separate from `agent-framework-core` so CodeAct + usage and installation remain optional. +- Alpha-package samples live under `packages/hyperlight/samples/`. +- `file_mounts` accepts a single string shorthand, an explicit `(host_path, + mount_path)` pair, or a `FileMount` named tuple. The host-side path in the + explicit forms may be a `str` or `Path`. Use the explicit two-value form when + the host path differs from the sandbox path. +- `allowed_domains` accepts a single string target such as `"github.com"` to + allow all backend-supported methods, an explicit `(target, method_or_methods)` + tuple such as `("github.com", "GET")`, or an `AllowedDomain` named tuple. diff --git a/python/packages/hyperlight/agent_framework_hyperlight/__init__.py b/python/packages/hyperlight/agent_framework_hyperlight/__init__.py new file mode 100644 index 0000000000..511252d0df --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import importlib.metadata + +from ._execute_code_tool import HyperlightExecuteCodeTool +from ._provider import HyperlightCodeActProvider +from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountInput + +try: + __version__ = importlib.metadata.version(__name__) +except importlib.metadata.PackageNotFoundError: + __version__ = "0.0.0" + +__all__ = [ + "AllowedDomain", + "AllowedDomainInput", + "FileMount", + "FileMountInput", + "HyperlightCodeActProvider", + "HyperlightExecuteCodeTool", + "__version__", +] diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_execute_code_tool.py b/python/packages/hyperlight/agent_framework_hyperlight/_execute_code_tool.py new file mode 100644 index 0000000000..a46707ac0d --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_execute_code_tool.py @@ -0,0 +1,865 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import ast +import asyncio +import copy +import mimetypes +import shutil +import threading +import time +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from pathlib import Path, PurePosixPath +from tempfile import TemporaryDirectory +from typing import Annotated, Any, Protocol, TypeGuard, cast +from urllib.parse import urlparse + +from agent_framework import Content, FunctionTool +from agent_framework._tools import ApprovalMode, normalize_tools +from pydantic import BaseModel, Field + +from ._instructions import build_codeact_instructions, build_execute_code_description +from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountHostPath, FileMountInput + +DEFAULT_HYPERLIGHT_BACKEND = "wasm" +DEFAULT_HYPERLIGHT_MODULE = "python_guest.path" +EXECUTE_CODE_INPUT_DESCRIPTION = "Python code to execute in an isolated Hyperlight sandbox." +OUTPUT_FILE_RETRY_ATTEMPTS = 10 +OUTPUT_FILE_RETRY_DELAY_SECONDS = 0.1 + + +class _ExecuteCodeInput(BaseModel): + code: Annotated[str, Field(description=EXECUTE_CODE_INPUT_DESCRIPTION)] + + +@dataclass(frozen=True, slots=True) +class _StoredFileMount: + host_path: Path + mount_path: str + + +@dataclass(frozen=True, slots=True) +class _NormalizedFileMount: + host_path: Path + mount_path: str + path_signature: tuple[tuple[str, int, int], ...] + + +@dataclass(frozen=True, slots=True) +class _RunConfig: + backend: str + module: str | None + module_path: str | None + approval_mode: ApprovalMode + tools: tuple[FunctionTool, ...] + workspace_root: Path | None + workspace_signature: tuple[tuple[str, int, int], ...] + file_mounts: tuple[_NormalizedFileMount, ...] + allowed_domains: tuple[AllowedDomain, ...] + + @property + def mounted_paths(self) -> tuple[str, ...]: + return tuple(_display_mount_path(mount.mount_path) for mount in self.file_mounts) + + @property + def filesystem_enabled(self) -> bool: + return self.workspace_root is not None or bool(self.file_mounts) + + def cache_key(self) -> tuple[Any, ...]: + return ( + self.backend, + self.module, + self.module_path, + self.approval_mode, + tuple((tool_obj.name, id(tool_obj)) for tool_obj in self.tools), + str(self.workspace_root) if self.workspace_root is not None else None, + self.workspace_signature, + tuple((mount.mount_path, str(mount.host_path), mount.path_signature) for mount in self.file_mounts), + tuple((allowed_domain.target, allowed_domain.methods) for allowed_domain in self.allowed_domains), + ) + + +class SandboxRuntime(Protocol): + def execute(self, *, config: _RunConfig, code: str) -> list[Content]: ... + + +@dataclass +class _SandboxEntry: + sandbox: Any + snapshot: Any + input_dir: TemporaryDirectory[str] | None + output_dir: TemporaryDirectory[str] | None + lock: threading.RLock + + +def _load_sandbox_class() -> type[Any]: + try: + from hyperlight_sandbox import Sandbox + except ModuleNotFoundError as exc: + raise ModuleNotFoundError( + "Hyperlight support requires `hyperlight-sandbox`, `hyperlight-sandbox-python-guest`, " + "and a compatible backend package such as `hyperlight-sandbox-backend-wasm`." + ) from exc + + return Sandbox + + +def _passthrough_result_parser(result: Any) -> str: + return repr(result) + + +def _collect_tools(*tool_groups: Any) -> list[FunctionTool]: + tools_by_name: dict[str, FunctionTool] = {} + + for tool_group in tool_groups: + normalized_group = normalize_tools(tool_group) + for tool_obj in normalized_group: + if not isinstance(tool_obj, FunctionTool): + continue + if tool_obj.name == "execute_code": + continue + tools_by_name.pop(tool_obj.name, None) + tools_by_name[tool_obj.name] = tool_obj + + return list(tools_by_name.values()) + + +def _resolve_execute_code_approval_mode( + *, + base_approval_mode: ApprovalMode, + tools: Sequence[FunctionTool], +) -> ApprovalMode: + if base_approval_mode == "always_require": + return "always_require" + + if any(tool_obj.approval_mode == "always_require" for tool_obj in tools): + return "always_require" + + return "never_require" + + +def _resolve_existing_path(value: str | Path) -> Path: + return Path(value).expanduser().resolve(strict=True) + + +def _resolve_workspace_root(value: str | Path | None) -> Path | None: + if value is None: + return None + + resolved_path = _resolve_existing_path(value) + if not resolved_path.is_dir(): + raise ValueError("workspace_root must point to an existing directory.") + return resolved_path + + +def _is_file_mount_pair(value: Any) -> TypeGuard[FileMount | tuple[FileMountHostPath, str]]: + if not isinstance(value, tuple): + return False + + value_tuple = cast(tuple[object, ...], value) + if len(value_tuple) != 2: + return False + + host_path, mount_path = value_tuple + return isinstance(host_path, (str, Path)) and isinstance(mount_path, str) + + +def _normalize_file_mount_input(file_mount: FileMountInput) -> _StoredFileMount: + host_path: FileMountHostPath + mount_path: str + if isinstance(file_mount, str): + host_path = file_mount + mount_path = file_mount + else: + host_path = file_mount[0] + mount_path = file_mount[1] + + return _StoredFileMount( + host_path=_resolve_existing_path(host_path), + mount_path=_normalize_mount_path(mount_path), + ) + + +def _normalize_domain(target: str) -> str: + candidate = target.strip() + if not candidate: + raise ValueError("Allowed domain entries must not be empty.") + + parsed = urlparse(candidate if "://" in candidate else f"//{candidate}") + normalized = (parsed.netloc or parsed.path).strip().rstrip("/") + if not normalized: + raise ValueError(f"Could not normalize allowed domain entry: {target!r}.") + return normalized.lower() + + +def _normalize_http_method(method: str) -> str: + normalized = method.strip().upper() + if not normalized: + raise ValueError("HTTP method entries must not be empty.") + return normalized + + +def _normalize_http_methods(methods: str | Sequence[str] | None) -> tuple[str, ...] | None: + if methods is None: + return None + + normalized_methods = ( + {_normalize_http_method(methods)} + if isinstance(methods, str) + else {_normalize_http_method(method) for method in methods} + ) + if not normalized_methods: + raise ValueError("Allowed domain methods must not be empty when provided.") + return tuple(sorted(normalized_methods)) + + +def _is_allowed_domain_pair(value: Any) -> TypeGuard[tuple[str, str | Sequence[str]]]: + if not isinstance(value, tuple) or isinstance(value, AllowedDomain): + return False + + value_tuple = cast(tuple[object, ...], value) + if len(value_tuple) != 2: + return False + + target, methods = value_tuple + if not isinstance(target, str): + return False + if isinstance(methods, str): + return True + return isinstance(methods, Sequence) + + +def _normalize_allowed_domain_input(allowed_domain: AllowedDomainInput) -> AllowedDomain: + if isinstance(allowed_domain, str): + return AllowedDomain(target=_normalize_domain(allowed_domain), methods=None) + + if isinstance(allowed_domain, AllowedDomain): + return AllowedDomain( + target=_normalize_domain(allowed_domain.target), + methods=_normalize_http_methods(allowed_domain.methods), + ) + + target, methods = allowed_domain + return AllowedDomain( + target=_normalize_domain(target), + methods=_normalize_http_methods(methods), + ) + + +def _allowed_domain_registration_targets(*, target: str, expand_missing_scheme: bool) -> tuple[str, ...]: + if not expand_missing_scheme or "://" in target: + return (target,) + return (f"http://{target}", f"https://{target}") + + +def _should_retry_allowed_domain_registration( + *, + error: RuntimeError, + allowed_domains: Sequence[AllowedDomain], +) -> bool: + message = str(error).lower() + return "invalid url for network permission" in message and any( + "://" not in domain.target for domain in allowed_domains + ) + + +def _normalize_mount_path(mount_path: str) -> str: + raw_path = mount_path.strip().replace("\\", "/") + if not raw_path: + raise ValueError("mount_path must not be empty.") + + pure_path = PurePosixPath(raw_path) + parts = [part for part in pure_path.parts if part not in {"", "/", "."}] + if parts and parts[0] == "input": + parts = parts[1:] + if any(part == ".." for part in parts): + raise ValueError("mount_path must stay within /input.") + if not parts: + raise ValueError("mount_path must point to a concrete path under /input.") + return "/".join(parts) + + +def _display_mount_path(mount_path: str) -> str: + return f"/input/{mount_path}" + + +def _path_tree_signature(path: Path) -> tuple[tuple[str, int, int], ...]: + if path.is_file(): + stat = path.stat() + return ((path.name, int(stat.st_size), int(stat.st_mtime_ns)),) + + entries: list[tuple[str, int, int]] = [] + for candidate in sorted(path.rglob("*"), key=lambda value: value.as_posix()): + try: + stat = candidate.stat() + except FileNotFoundError: + continue + relative_path = candidate.relative_to(path).as_posix() + size = int(stat.st_size) if candidate.is_file() else 0 + entries.append((relative_path, size, int(stat.st_mtime_ns))) + return tuple(entries) + + +def _copy_path(source: Path, destination: Path) -> None: + if source.is_dir(): + destination.mkdir(parents=True, exist_ok=True) + for child in sorted(source.iterdir(), key=lambda value: value.name): + _copy_path(child, destination / child.name) + return + + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, destination) + + +def _populate_input_dir(*, config: _RunConfig, input_root: Path) -> None: + if config.workspace_root is not None: + for child in sorted(config.workspace_root.iterdir(), key=lambda value: value.name): + _copy_path(child, input_root / child.name) + + for mount in config.file_mounts: + _copy_path(mount.host_path, input_root / mount.mount_path) + + +def _create_file_content(file_path: Path, *, relative_path: str) -> Content: + media_type = mimetypes.guess_type(file_path.name)[0] or "application/octet-stream" + return Content.from_data( + data=file_path.read_bytes(), + media_type=media_type, + additional_properties={"path": f"/output/{relative_path}"}, + ) + + +def _normalize_output_relative_path(*, output_file: object, root: Path) -> str | None: + candidate_path = Path(str(output_file)) + if candidate_path.is_absolute(): + try: + return candidate_path.relative_to(root).as_posix() + except ValueError: + return None + + raw_path = str(output_file).replace("\\", "/") + pure_path = PurePosixPath(raw_path) + parts = [part for part in pure_path.parts if part not in {"", "/", "."}] + if parts and parts[0] == "output": + parts = parts[1:] + if not parts or any(part == ".." for part in parts): + return None + return "/".join(parts) + + +def _collect_output_relative_paths(*, sandbox: Any, root: Path) -> set[str]: + relative_paths: set[str] = set() + + if hasattr(sandbox, "get_output_files"): + try: + output_files = cast(Sequence[object], sandbox.get_output_files()) + except Exception: + output_files = () + + for output_file in output_files: + if (relative_path := _normalize_output_relative_path(output_file=output_file, root=root)) is not None: + relative_paths.add(relative_path) + + for host_path in root.rglob("*"): + if host_path.is_file(): + relative_paths.add(host_path.relative_to(root).as_posix()) + + return relative_paths + + +def _parse_output_files( + *, + sandbox: Any, + output_dir: TemporaryDirectory[str] | None, + expect_output_files: bool, +) -> list[Content]: + if output_dir is None: + return [] + + root = Path(output_dir.name) + + for attempt in range(OUTPUT_FILE_RETRY_ATTEMPTS): + relative_paths = _collect_output_relative_paths(sandbox=sandbox, root=root) + missing_files = expect_output_files and not relative_paths + contents: list[Content] = [] + + for relative_path in sorted(relative_paths): + host_path = root.joinpath(*PurePosixPath(relative_path).parts) + if not host_path.is_file(): + missing_files = True + continue + try: + contents.append(_create_file_content(host_path, relative_path=relative_path)) + except PermissionError: + missing_files = True + + if not missing_files or attempt == OUTPUT_FILE_RETRY_ATTEMPTS - 1: + return contents + + time.sleep(OUTPUT_FILE_RETRY_DELAY_SECONDS) + + return [] + + +def _build_execution_contents( + *, + result: Any, + sandbox: Any, + output_dir: TemporaryDirectory[str] | None, + code: str, +) -> list[Content]: + success = bool(getattr(result, "success", False)) + stdout = str(getattr(result, "stdout", "") or "").replace("\r\n", "\n") or None + stderr = str(getattr(result, "stderr", "") or "").replace("\r\n", "\n") or None + outputs: list[Content] = [] + + if stdout is not None: + outputs.append(Content.from_text(stdout, raw_representation=result)) + + outputs.extend( + _parse_output_files( + sandbox=sandbox, + output_dir=output_dir, + expect_output_files="/output" in code, + ) + ) + + if success: + if stderr is not None: + outputs.append(Content.from_text(stderr, raw_representation=result)) + if not outputs: + outputs.append(Content.from_text("Code executed successfully without output.")) + return outputs + + error_details = stderr or "Unknown sandbox error" + outputs.append( + Content.from_error( + message="Execution error", + error_details=error_details, + raw_representation=result, + ) + ) + return outputs + + +def _make_sandbox_callback(tool_obj: FunctionTool) -> Callable[..., Any]: + sandbox_tool = copy.copy(tool_obj) + # Auto-assign a passthrough parser so the raw return value round-trips through + # `ast.literal_eval` in the sandbox callback below. User-supplied parsers are + # left in place so callers can customize how results are exposed to the guest. + if sandbox_tool.result_parser is None: + sandbox_tool.result_parser = _passthrough_result_parser + + def _callback(**kwargs: Any) -> Any: + async def _invoke() -> list[Content]: + return await sandbox_tool.invoke(arguments=kwargs) + + # FunctionTool.invoke() is always async. The real Hyperlight backend invokes + # registered callbacks synchronously via FFI, so this must be a sync function. + # We run the async call on a dedicated thread to avoid conflicts with any + # event loop that may be running on the current thread. + result_box: list[Any] = [None] + error_box: list[BaseException] = [] + + def _run() -> None: + try: + result_box[0] = asyncio.run(_invoke()) + except BaseException as exc: + error_box.append(exc) + + worker = threading.Thread(target=_run) + worker.start() + worker.join() + if error_box: + raise error_box[0] + contents: list[Content] = result_box[0] + + values: list[Any] = [] + for content in contents: + if content.type == "text" and content.text is not None: + try: + values.append(ast.literal_eval(content.text)) + except (SyntaxError, ValueError): + values.append(content.text) + continue + + values.append(content.to_dict()) + + if len(values) == 1: + return values[0] + return values + + return _callback + + +def _clear_directory(output_dir: TemporaryDirectory[str] | None) -> None: + """Remove all contents of the output directory without deleting the directory itself.""" + if output_dir is None: + return + root = Path(output_dir.name) + for child in root.iterdir(): + try: + if child.is_symlink() or child.is_file(): + child.unlink() + elif child.is_dir(): + shutil.rmtree(child, ignore_errors=True) + except (FileNotFoundError, PermissionError): + pass + + +class _SandboxRegistry: + def __init__(self) -> None: + self._entries: dict[tuple[Any, ...], _SandboxEntry] = {} + self._entries_lock = threading.RLock() + + def execute(self, *, config: _RunConfig, code: str) -> list[Content]: + """Execute code in a cached sandbox matching the given config. + + Entries are keyed by ``config.cache_key()``. Concurrent calls with the same + key are serialized by the entry lock so they never race, but they share the + same sandbox instance. For true parallel execution, use distinct provider + instances or configs that produce different cache keys. + """ + cache_key = config.cache_key() + with self._entries_lock: + entry = self._entries.get(cache_key) + if entry is None: + entry = self._create_entry(config) + self._entries[cache_key] = entry + + with entry.lock: + entry.sandbox.restore(entry.snapshot) + _clear_directory(entry.output_dir) + result = entry.sandbox.run(code=code) + return _build_execution_contents( + result=result, + sandbox=entry.sandbox, + output_dir=entry.output_dir, + code=code, + ) + + def _create_entry(self, config: _RunConfig) -> _SandboxEntry: + input_dir_handle = TemporaryDirectory() if config.filesystem_enabled else None + output_dir_handle = TemporaryDirectory() if config.filesystem_enabled else None + + if input_dir_handle is not None: + _populate_input_dir(config=config, input_root=Path(input_dir_handle.name)) + + sandbox_cls = _load_sandbox_class() + + def _create_sandbox() -> Any: + try: + return sandbox_cls( + backend=config.backend, + module=config.module, + module_path=config.module_path, + input_dir=input_dir_handle.name if input_dir_handle is not None else None, + output_dir=output_dir_handle.name if output_dir_handle is not None else None, + ) + except ImportError as exc: + raise RuntimeError( + "The selected Hyperlight backend is not installed or not supported on this platform. " + "Install a compatible backend package, such as `hyperlight-sandbox-backend-wasm`." + ) from exc + + def _configure_sandbox(*, sandbox: Any, expand_missing_scheme: bool) -> None: + for tool_obj in config.tools: + sandbox.register_tool(tool_obj.name, _make_sandbox_callback(tool_obj)) + + for allowed_domain in config.allowed_domains: + for target in _allowed_domain_registration_targets( + target=allowed_domain.target, + expand_missing_scheme=expand_missing_scheme, + ): + sandbox.allow_domain( + target, + methods=list(allowed_domain.methods) if allowed_domain.methods is not None else None, + ) + + sandbox = _create_sandbox() + _configure_sandbox(sandbox=sandbox, expand_missing_scheme=False) + + try: + sandbox.run("None") + except RuntimeError as exc: + if not _should_retry_allowed_domain_registration(error=exc, allowed_domains=config.allowed_domains): + raise + + sandbox = _create_sandbox() + _configure_sandbox(sandbox=sandbox, expand_missing_scheme=True) + sandbox.run("None") + + snapshot = sandbox.snapshot() + return _SandboxEntry( + sandbox=sandbox, + snapshot=snapshot, + input_dir=input_dir_handle, + output_dir=output_dir_handle, + lock=threading.RLock(), + ) + + +class HyperlightExecuteCodeTool(FunctionTool): + """Execute Python code inside a Hyperlight sandbox.""" + + def __init__( + self, + *, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None, + approval_mode: ApprovalMode | None = None, + workspace_root: str | Path | None = None, + file_mounts: FileMountInput | Sequence[FileMountInput] | None = None, + allowed_domains: AllowedDomainInput | Sequence[AllowedDomainInput] | None = None, + backend: str = DEFAULT_HYPERLIGHT_BACKEND, + module: str | None = DEFAULT_HYPERLIGHT_MODULE, + module_path: str | None = None, + _registry: SandboxRuntime | None = None, + ) -> None: + super().__init__( + name="execute_code", + description=EXECUTE_CODE_INPUT_DESCRIPTION, + approval_mode="never_require", + func=self._run_code, + input_model=_ExecuteCodeInput, + ) + self._state_lock = threading.RLock() + self._registry = _registry or _SandboxRegistry() + self._default_approval_mode: ApprovalMode = approval_mode or "never_require" + self._workspace_root = _resolve_workspace_root(workspace_root) + self._backend: str = backend + self._module: str | None = module + self._module_path: str | None = module_path + self._managed_tools: list[FunctionTool] = [] + self._file_mounts: dict[str, _StoredFileMount] = {} + self._allowed_domains: dict[str, AllowedDomain] = {} + + if tools is not None: + self.add_tools(tools) + if file_mounts is not None: + self.add_file_mounts(file_mounts) + if allowed_domains is not None: + self.add_allowed_domains(allowed_domains) + + self._refresh_approval_mode() + + @property + def description(self) -> str: + state_lock = getattr(self, "_state_lock", None) + if state_lock is None: + return str(self.__dict__.get("description", EXECUTE_CODE_INPUT_DESCRIPTION)) + + with state_lock: + allowed_domains = sorted(self._allowed_domains.values(), key=lambda value: value.target) + return build_execute_code_description( + tools=self._managed_tools, + filesystem_enabled=self._workspace_root is not None or bool(self._file_mounts), + workspace_enabled=self._workspace_root is not None, + mounted_paths=[_display_mount_path(mount.mount_path) for mount in self._file_mounts.values()], + allowed_domains=allowed_domains, + ) + + @description.setter + def description(self, value: str) -> None: + self.__dict__["description"] = value + + def add_tools( + self, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]], + ) -> None: + """Add sandbox-managed tools to this execute_code surface.""" + with self._state_lock: + combined_tools = _collect_tools(self._managed_tools, tools) + self._managed_tools = combined_tools + self._refresh_approval_mode() + + def get_tools(self) -> list[FunctionTool]: + """Return the currently managed sandbox tools.""" + with self._state_lock: + return list(self._managed_tools) + + def remove_tool(self, name: str) -> None: + """Remove one managed sandbox tool by name.""" + with self._state_lock: + remaining_tools = [tool_obj for tool_obj in self._managed_tools if tool_obj.name != name] + if len(remaining_tools) == len(self._managed_tools): + raise KeyError(f"No managed tool named {name!r} is registered.") + self._managed_tools = remaining_tools + self._refresh_approval_mode() + + def clear_tools(self) -> None: + """Remove all managed sandbox tools.""" + with self._state_lock: + self._managed_tools = [] + self._refresh_approval_mode() + + def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None: + """Add one or more file mounts under `/input`. + + A single string uses the same relative path on the host and in the sandbox. + Use a two-string tuple or `FileMount` when those paths differ. + """ + if isinstance(file_mounts, str) or _is_file_mount_pair(file_mounts): + normalized_mounts = [_normalize_file_mount_input(file_mounts)] + else: + normalized_mounts = [ + _normalize_file_mount_input(mount) for mount in cast(Sequence[FileMountInput], file_mounts) + ] + + with self._state_lock: + for mount in normalized_mounts: + self._file_mounts[mount.mount_path] = mount + + def get_file_mounts(self) -> list[FileMount]: + """Return the configured file mounts.""" + with self._state_lock: + return [ + FileMount(host_path=mount.host_path, mount_path=_display_mount_path(mount.mount_path)) + for mount in self._file_mounts.values() + ] + + def remove_file_mount(self, mount_path: str) -> None: + """Remove one file mount by its sandbox path.""" + normalized_mount_path = _normalize_mount_path(mount_path) + with self._state_lock: + if normalized_mount_path not in self._file_mounts: + raise KeyError(f"No file mount exists for {mount_path!r}.") + del self._file_mounts[normalized_mount_path] + + def clear_file_mounts(self) -> None: + """Remove all configured file mounts.""" + with self._state_lock: + self._file_mounts.clear() + + def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None: + """Add one or more outbound allow-list entries.""" + if isinstance(domains, (str, AllowedDomain)) or _is_allowed_domain_pair(domains): + normalized_domains = [_normalize_allowed_domain_input(domains)] + else: + normalized_domains = [ + _normalize_allowed_domain_input(domain) for domain in cast(Sequence[AllowedDomainInput], domains) + ] + + with self._state_lock: + for normalized_domain in normalized_domains: + self._allowed_domains[normalized_domain.target] = normalized_domain + + def get_allowed_domains(self) -> list[AllowedDomain]: + """Return the configured outbound allow-list entries.""" + with self._state_lock: + return sorted(self._allowed_domains.values(), key=lambda value: value.target) + + def remove_allowed_domain(self, domain: str) -> None: + """Remove one outbound allow-list entry.""" + normalized_domain = _normalize_domain(domain) + with self._state_lock: + if normalized_domain not in self._allowed_domains: + raise KeyError(f"No allowed domain exists for {domain!r}.") + del self._allowed_domains[normalized_domain] + + def clear_allowed_domains(self) -> None: + """Remove all outbound allow-list entries.""" + with self._state_lock: + self._allowed_domains.clear() + + def build_instructions(self, *, tools_visible_to_model: bool) -> str: + """Build the current CodeAct instructions for this execute_code surface.""" + config = self._build_run_config() + return build_codeact_instructions( + tools=config.tools, + tools_visible_to_model=tools_visible_to_model, + filesystem_enabled=config.filesystem_enabled, + ) + + def create_run_tool(self) -> HyperlightExecuteCodeTool: + """Create a run-scoped snapshot of this execute_code surface.""" + file_mounts = self.get_file_mounts() + allowed_domains = self.get_allowed_domains() + + return HyperlightExecuteCodeTool( + tools=self.get_tools(), + approval_mode=self._default_approval_mode, + workspace_root=self._workspace_root, + file_mounts=file_mounts or None, + allowed_domains=allowed_domains or None, + backend=self._backend, + module=self._module, + module_path=self._module_path, + _registry=self._registry, + ) + + def build_serializable_state(self) -> dict[str, Any]: + """Return a JSON-serializable snapshot of the effective run state.""" + config = self._build_run_config() + return { + "backend": config.backend, + "module": config.module, + "module_path": config.module_path, + "approval_mode": config.approval_mode, + "tool_names": [tool_obj.name for tool_obj in config.tools], + "filesystem_enabled": config.filesystem_enabled, + "workspace_root": str(config.workspace_root) if config.workspace_root is not None else None, + "file_mounts": [ + { + "host_path": str(mount.host_path), + "mount_path": _display_mount_path(mount.mount_path), + } + for mount in config.file_mounts + ], + "network_enabled": bool(config.allowed_domains), + "allowed_domains": [ + { + "target": allowed_domain.target, + "methods": list(allowed_domain.methods) if allowed_domain.methods is not None else None, + } + for allowed_domain in config.allowed_domains + ], + } + + def to_dict(self, *, exclude: set[str] | None = None, exclude_none: bool = True) -> dict[str, Any]: + self.__dict__["description"] = self.description + return super().to_dict(exclude=exclude, exclude_none=exclude_none) + + def _refresh_approval_mode(self) -> None: + self.approval_mode = _resolve_execute_code_approval_mode( + base_approval_mode=self._default_approval_mode, + tools=self._managed_tools, + ) + + def _build_run_config(self) -> _RunConfig: + with self._state_lock: + managed_tools = tuple(self._managed_tools) + workspace_root = self._workspace_root + stored_mounts = tuple(self._file_mounts.values()) + allowed_domains = tuple(sorted(self._allowed_domains.values(), key=lambda value: value.target)) + approval_mode = _resolve_execute_code_approval_mode( + base_approval_mode=self._default_approval_mode, + tools=managed_tools, + ) + + workspace_signature = _path_tree_signature(workspace_root) if workspace_root is not None else () + normalized_mounts = tuple( + _NormalizedFileMount( + host_path=mount.host_path, + mount_path=mount.mount_path, + path_signature=_path_tree_signature(mount.host_path), + ) + for mount in stored_mounts + ) + + return _RunConfig( + backend=self._backend, + module=self._module, + module_path=self._module_path, + approval_mode=approval_mode, + tools=managed_tools, + workspace_root=workspace_root, + workspace_signature=workspace_signature, + file_mounts=normalized_mounts, + allowed_domains=allowed_domains, + ) + + async def _run_code(self, *, code: str) -> list[Content]: + config = self._build_run_config() + return await asyncio.to_thread(self._registry.execute, config=config, code=code) diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_instructions.py b/python/packages/hyperlight/agent_framework_hyperlight/_instructions.py new file mode 100644 index 0000000000..c44a183062 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_instructions.py @@ -0,0 +1,139 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +from collections.abc import Sequence + +from agent_framework import FunctionTool + +from ._types import AllowedDomain + + +def _format_tool_summaries(tools: Sequence[FunctionTool]) -> str: + if not tools: + return "- No tools are currently registered inside the sandbox." + + lines: list[str] = [] + for tool_obj in tools: + parameters = tool_obj.parameters().get("properties", {}) + parameter_names = [name for name in parameters if isinstance(name, str)] + parameter_summary = ", ".join(parameter_names) if parameter_names else "none" + description = str(tool_obj.description or "").strip() or "No description provided." + lines.append(f"- `{tool_obj.name}`: {description} Parameters: {parameter_summary}.") + return "\n".join(lines) + + +def _format_filesystem_capabilities( + *, + filesystem_enabled: bool, + workspace_enabled: bool, + mounted_paths: Sequence[str], +) -> str: + if not filesystem_enabled: + return "Filesystem access is unavailable because no workspace root or file mounts are configured." + + lines = ["Filesystem access is enabled."] + lines.append("Read files from `/input`.") + lines.append("Write generated artifacts to `/output`; returned files will be attached to the tool result.") + + if workspace_enabled: + lines.append("The configured workspace root is available under `/input/`.") + + if mounted_paths: + lines.append("Additional mounted paths:") + lines.extend(f"- `{mounted_path}`" for mounted_path in mounted_paths) + elif not workspace_enabled: + lines.append("No workspace root or explicit file mounts are currently configured.") + + return "\n".join(lines) + + +def _format_network_capabilities( + *, + allowed_domains: Sequence[AllowedDomain], +) -> str: + if not allowed_domains: + return "Outbound network access is unavailable because no allow-listed targets are configured." + + lines = ["Outbound network access is allowed only for these configured targets:"] + for allowed_domain in allowed_domains: + methods_text = ( + ", ".join(allowed_domain.methods) if allowed_domain.methods else "all methods allowed by the backend" + ) + lines.append(f"- `{allowed_domain.target}`: {methods_text}.") + return "\n".join(lines) + + +def build_codeact_instructions( + *, + tools: Sequence[FunctionTool], + tools_visible_to_model: bool, + filesystem_enabled: bool = False, +) -> str: + """Build dynamic CodeAct instructions for the effective sandbox state.""" + usage_note = ( + "Some tools may also appear directly, but prefer `execute_code` whenever you need to combine Python " + "control flow with sandbox tool calls." + if tools_visible_to_model + else "Provider-owned sandbox tools are not exposed separately; use `execute_code` when you need them." + ) + + output_note = ( + "To surface results from `execute_code`, end the code with `print(...)`; the sandbox does not " + "return the value of the last expression." + ) + if filesystem_enabled: + output_note += ( + " For larger artifacts, write them to `/output/` instead — returned files will be " + "attached to the tool result." + ) + + return f"""You have one primary tool: execute_code. + +Prefer one execute_code call per request when possible. +Its tool description contains the current `call_tool(...)` guidance, sandbox +tool registry, and capability limits. + +{output_note} + +{usage_note} +""" + + +def build_execute_code_description( + *, + tools: Sequence[FunctionTool], + filesystem_enabled: bool, + workspace_enabled: bool, + mounted_paths: Sequence[str], + allowed_domains: Sequence[AllowedDomain], +) -> str: + """Build the dynamic execute_code tool description for standalone usage.""" + filesystem_text = _format_filesystem_capabilities( + filesystem_enabled=filesystem_enabled, + workspace_enabled=workspace_enabled, + mounted_paths=mounted_paths, + ) + network_text = _format_network_capabilities( + allowed_domains=allowed_domains, + ) + + return f"""Execute Python in an isolated Hyperlight sandbox. + +Inside the sandbox, `call_tool(name, **kwargs)` is available as a built-in for +registered host callbacks. Use the tool name as the first argument and keyword +arguments only. Do not pass a dict or any other positional arguments after the +tool name. + +Registered sandbox tools: +{_format_tool_summaries(tools)} + +Filesystem capabilities: +{filesystem_text} + +Network capabilities: +{network_text} + +Prefer `execute_code` when you need to combine one or more `call_tool(...)` +calls with Python control flow, loops, or post-processing. +""" diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_provider.py b/python/packages/hyperlight/agent_framework_hyperlight/_provider.py new file mode 100644 index 0000000000..1232ecc262 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_provider.py @@ -0,0 +1,111 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +from collections.abc import Callable, Sequence +from pathlib import Path +from typing import Any + +from agent_framework import AgentSession, ContextProvider, FunctionTool, SessionContext +from agent_framework._tools import ApprovalMode + +from ._execute_code_tool import HyperlightExecuteCodeTool, SandboxRuntime +from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountInput + + +class HyperlightCodeActProvider(ContextProvider): + """Inject a Hyperlight-backed CodeAct surface using provider-owned tools.""" + + DEFAULT_SOURCE_ID = "hyperlight_codeact" + + def __init__( + self, + source_id: str = DEFAULT_SOURCE_ID, + *, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None, + approval_mode: ApprovalMode | None = None, + workspace_root: str | Path | None = None, + file_mounts: FileMountInput | Sequence[FileMountInput] | None = None, + allowed_domains: AllowedDomainInput | Sequence[AllowedDomainInput] | None = None, + backend: str = "wasm", + module: str | None = "python_guest.path", + module_path: str | None = None, + _registry: SandboxRuntime | None = None, + ) -> None: + super().__init__(source_id) + self._execute_code_tool = HyperlightExecuteCodeTool( + tools=tools, + approval_mode=approval_mode, + workspace_root=workspace_root, + file_mounts=file_mounts, + allowed_domains=allowed_domains, + backend=backend, + module=module, + module_path=module_path, + _registry=_registry, + ) + + def add_tools( + self, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]], + ) -> None: + """Add provider-owned sandbox tools.""" + self._execute_code_tool.add_tools(tools) + + def get_tools(self) -> list[FunctionTool]: + """Return the provider-owned sandbox tools.""" + return self._execute_code_tool.get_tools() + + def remove_tool(self, name: str) -> None: + """Remove one provider-owned sandbox tool by name.""" + self._execute_code_tool.remove_tool(name) + + def clear_tools(self) -> None: + """Remove all provider-owned sandbox tools.""" + self._execute_code_tool.clear_tools() + + def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None: + """Add provider-managed file mounts.""" + self._execute_code_tool.add_file_mounts(file_mounts) + + def get_file_mounts(self) -> list[FileMount]: + """Return the provider-managed file mounts.""" + return self._execute_code_tool.get_file_mounts() + + def remove_file_mount(self, mount_path: str) -> None: + """Remove one provider-managed file mount.""" + self._execute_code_tool.remove_file_mount(mount_path) + + def clear_file_mounts(self) -> None: + """Remove all provider-managed file mounts.""" + self._execute_code_tool.clear_file_mounts() + + def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None: + """Add provider-managed outbound allow-list entries.""" + self._execute_code_tool.add_allowed_domains(domains) + + def get_allowed_domains(self) -> list[AllowedDomain]: + """Return the provider-managed outbound allow-list entries.""" + return self._execute_code_tool.get_allowed_domains() + + def remove_allowed_domain(self, domain: str) -> None: + """Remove one provider-managed outbound allow-list entry.""" + self._execute_code_tool.remove_allowed_domain(domain) + + def clear_allowed_domains(self) -> None: + """Remove all provider-managed outbound allow-list entries.""" + self._execute_code_tool.clear_allowed_domains() + + async def before_run( + self, + *, + agent: Any, + session: AgentSession | None, + context: SessionContext, + state: dict[str, Any], + ) -> None: + """Inject CodeAct instructions and a run-scoped execute_code tool before each run.""" + run_tool = self._execute_code_tool.create_run_tool() + state[self.source_id] = run_tool.build_serializable_state() + context.extend_instructions(self.source_id, run_tool.build_instructions(tools_visible_to_model=False)) + context.extend_tools(self.source_id, [run_tool]) diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_types.py b/python/packages/hyperlight/agent_framework_hyperlight/_types.py new file mode 100644 index 0000000000..8d202c8986 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_types.py @@ -0,0 +1,28 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +from collections.abc import Sequence +from pathlib import Path +from typing import NamedTuple, TypeAlias + + +class FileMount(NamedTuple): + """Map a host file or directory into the sandbox input tree.""" + + host_path: str | Path + mount_path: str + + +FileMountHostPath: TypeAlias = str | Path +FileMountInput: TypeAlias = str | tuple[FileMountHostPath, str] | FileMount + + +class AllowedDomain(NamedTuple): + """Allow outbound requests to one target, optionally restricted to specific HTTP methods.""" + + target: str + methods: tuple[str, ...] | None = None + + +AllowedDomainInput: TypeAlias = str | tuple[str, str | Sequence[str]] | AllowedDomain diff --git a/python/packages/hyperlight/agent_framework_hyperlight/py.typed b/python/packages/hyperlight/agent_framework_hyperlight/py.typed new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/py.typed @@ -0,0 +1 @@ + diff --git a/python/packages/hyperlight/pyproject.toml b/python/packages/hyperlight/pyproject.toml new file mode 100644 index 0000000000..21034b1a8e --- /dev/null +++ b/python/packages/hyperlight/pyproject.toml @@ -0,0 +1,101 @@ +[project] +name = "agent-framework-hyperlight" +description = "Hyperlight CodeAct integrations for Microsoft Agent Framework." +authors = [{ name = "Microsoft", email = "af-support@microsoft.com"}] +readme = "README.md" +requires-python = ">=3.10" +version = "1.0.0a260409" +license-files = ["LICENSE"] +urls.homepage = "https://aka.ms/agent-framework" +urls.source = "https://github.com/microsoft/agent-framework/tree/main/python" +urls.release_notes = "https://github.com/microsoft/agent-framework/releases?q=tag%3Apython-1&expanded=true" +urls.issues = "https://github.com/microsoft/agent-framework/issues" +classifiers = [ + "License :: OSI Approved :: MIT License", + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Typing :: Typed", +] +dependencies = [ + "agent-framework-core>=1.0.0,<2", + "hyperlight-sandbox>=0.3.0,<0.4", + "hyperlight-sandbox-backend-wasm>=0.3.0,<0.4 ; ((sys_platform == 'linux' and platform_machine == 'x86_64') or (sys_platform == 'win32' and platform_machine == 'AMD64')) and python_version < '3.14'", + "hyperlight-sandbox-python-guest>=0.3.0,<0.4", +] + +[tool.uv] +prerelease = "if-necessary-or-explicit" +environments = [ + "sys_platform == 'linux'", + "sys_platform == 'win32'" +] + +[tool.uv-dynamic-versioning] +fallback-version = "0.0.0" + +[tool.pytest.ini_options] +testpaths = 'tests' +addopts = "-ra -q -r fEX" +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +filterwarnings = [] +timeout = 120 +markers = [ + "integration: marks tests as integration tests that require external services", +] + +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.per-file-ignores] +"samples/**" = ["INP", "T201"] +"tests/**" = ["D", "INP", "TD", "ERA001", "RUF", "S"] + +[tool.coverage.run] +omit = [ + "**/__init__.py" +] + +[tool.pyright] +extends = "../../pyproject.toml" +include = ["agent_framework_hyperlight"] +exclude = ['tests'] + +[tool.mypy] +plugins = ['pydantic.mypy'] +strict = true +python_version = "3.10" +ignore_missing_imports = true +disallow_untyped_defs = true +no_implicit_optional = true +check_untyped_defs = true +warn_return_any = true +show_error_codes = true +warn_unused_ignores = false +disallow_incomplete_defs = true +disallow_untyped_decorators = true + +[tool.bandit] +targets = ["agent_framework_hyperlight"] +exclude_dirs = ["tests", "samples"] + +[tool.poe] +executor.type = "uv" +include = "../../shared_tasks.toml" + +[tool.poe.tasks.mypy] +help = "Run MyPy for this package." +cmd = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework_hyperlight" + +[tool.poe.tasks.test] +help = "Run the default unit test suite for this package." +cmd = 'pytest -m "not integration" --cov=agent_framework_hyperlight --cov-report=term-missing:skip-covered tests' + +[build-system] +requires = ["flit-core >= 3.11,<4.0"] +build-backend = "flit_core.buildapi" diff --git a/python/packages/hyperlight/samples/README.md b/python/packages/hyperlight/samples/README.md new file mode 100644 index 0000000000..aa6aeeee1c --- /dev/null +++ b/python/packages/hyperlight/samples/README.md @@ -0,0 +1,43 @@ +# Hyperlight CodeAct samples + +These samples demonstrate the alpha `agent-framework-hyperlight` package. + +## When to use which pattern + +- **Provider pattern** (`codeact_context_provider.py`): Use when the tool + registry, file mounts, or network allow-list may change between runs, or when + you want the provider to manage CodeAct instructions and approval computation + automatically on every invocation. This is the recommended default for + production agents that need dynamic capability management or concurrent runs + sharing one provider. + +- **Manual static wiring** (`codeact_manual_wiring.py`): Use when the sandbox + tool set and capabilities are fixed for the agent's lifetime. This pattern + builds instructions once, passes `execute_code` alongside direct tools in + `tools=`, and skips the per-run provider lifecycle entirely. Simpler setup, + but changes to the tool registry after construction will not update the + agent's instructions automatically. + +- **Standalone tool** (`codeact_tool.py`): Use for the simplest integration + where `execute_code` is added directly to the agent tool list. The tool's own + description advertises `call_tool(...)` and the registered sandbox tools, so + no extra agent instructions are needed. Best for quick prototyping or when + CodeAct is just another tool alongside the agent's direct tools. + +## Samples + +- `codeact_context_provider.py` shows the provider-owned CodeAct model where the + agent only sees `execute_code` and sandbox tools are owned by + `HyperlightCodeActProvider`. +- `codeact_manual_wiring.py` shows static wiring where `HyperlightExecuteCodeTool` + and its instructions are passed directly to the `Agent` constructor. +- `codeact_tool.py` shows the standalone `HyperlightExecuteCodeTool` surface + where `execute_code` is added directly to the agent tool list. + +Run the samples from the repository after installing the workspace dependencies: + +```bash +uv run --directory packages/hyperlight python samples/codeact_context_provider.py +uv run --directory packages/hyperlight python samples/codeact_manual_wiring.py +uv run --directory packages/hyperlight python samples/codeact_tool.py +``` diff --git a/python/packages/hyperlight/samples/codeact_benchmark.py b/python/packages/hyperlight/samples/codeact_benchmark.py new file mode 100644 index 0000000000..275187d3b8 --- /dev/null +++ b/python/packages/hyperlight/samples/codeact_benchmark.py @@ -0,0 +1,253 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Benchmark CodeAct vs. traditional tool-calling for a multi-tool-call task. + +This sample runs the same prompt against the same FoundryChatClient twice: + +1. **Traditional tool-calling**: the five business tools are passed directly to + the agent, so the model calls each tool individually via the LLM tool-call + interface. +2. **CodeAct**: the same tools are registered on a HyperlightCodeActProvider + and the model sees a single ``execute_code`` tool that calls them from + inside the Hyperlight sandbox via ``call_tool(...)``. + +The task (computing grand totals per user) naturally requires many tool calls +to complete. At the end, the sample prints elapsed time and token usage for +each run so the two approaches can be compared. + +Run with: + cd python + uv run --directory packages/hyperlight python samples/codeact_benchmark.py + +Required environment variables (loaded from ``.env`` if present): + FOUNDRY_PROJECT_ENDPOINT + FOUNDRY_MODEL +""" + +from __future__ import annotations + +import asyncio +import os +import time +from typing import Annotated, Any, Literal + +from agent_framework import Agent, AgentResponse, UsageDetails +from agent_framework.foundry import FoundryChatClient +from azure.identity import AzureCliCredential +from dotenv import load_dotenv +from pydantic import BaseModel, Field + +from agent_framework_hyperlight import HyperlightCodeActProvider + +load_dotenv() + + +# 1. Deterministic "business" data and tools. + +_USERS: list[dict[str, Any]] = [ + {"id": 1, "name": "Alice", "region": "EU", "tier": "gold"}, + {"id": 2, "name": "Bob", "region": "US", "tier": "silver"}, + {"id": 3, "name": "Charlie", "region": "US", "tier": "gold"}, + {"id": 4, "name": "Diana", "region": "APAC", "tier": "bronze"}, + {"id": 5, "name": "Evan", "region": "EU", "tier": "silver"}, + {"id": 6, "name": "Fiona", "region": "US", "tier": "gold"}, + {"id": 7, "name": "George", "region": "APAC", "tier": "gold"}, + {"id": 8, "name": "Hana", "region": "EU", "tier": "bronze"}, +] + +_ORDERS: dict[int, list[dict[str, Any]]] = { + 1: [{"product": "Widget", "qty": 3, "unit_price": 9.99}, {"product": "Gadget", "qty": 1, "unit_price": 19.99}], + 2: [{"product": "Widget", "qty": 1, "unit_price": 9.99}], + 3: [{"product": "Gadget", "qty": 2, "unit_price": 19.99}, {"product": "Thingamajig", "qty": 4, "unit_price": 4.50}], + 4: [{"product": "Widget", "qty": 10, "unit_price": 9.99}], + 5: [{"product": "Gadget", "qty": 1, "unit_price": 19.99}], + 6: [{"product": "Widget", "qty": 2, "unit_price": 9.99}, {"product": "Thingamajig", "qty": 5, "unit_price": 4.50}], + 7: [{"product": "Gadget", "qty": 3, "unit_price": 19.99}], + 8: [{"product": "Thingamajig", "qty": 2, "unit_price": 4.50}], +} + +_DISCOUNTS: dict[str, float] = {"gold": 0.20, "silver": 0.10, "bronze": 0.05} +_TAX_RATES: dict[str, float] = {"EU": 0.21, "US": 0.08, "APAC": 0.10} + + +def list_users() -> list[dict[str, Any]]: + """Return all users as a list of dictionaries. + + Each entry has keys: id (int), name (str), region (str), tier (str). + """ + return _USERS + + +def get_orders_for_user( + user_id: Annotated[int, "The user id whose orders to retrieve."], +) -> list[dict[str, Any]]: + """Return the user's orders as a list of dictionaries. + + Each entry has keys: product (str), qty (int), unit_price (float). + """ + return _ORDERS.get(user_id, []) + + +def get_discount_rate( + tier: Annotated[Literal["gold", "silver", "bronze"], "The customer tier."], +) -> float: + """Return the discount rate as a float fraction (e.g. 0.2 for 20%).""" + return _DISCOUNTS[tier] + + +def get_tax_rate( + region: Annotated[Literal["EU", "US", "APAC"], "The region code."], +) -> float: + """Return the tax rate as a float fraction (e.g. 0.21 for 21%).""" + return _TAX_RATES[region] + + +def compute_line_total( + qty: Annotated[int, "Line item quantity."], + unit_price: Annotated[float, "Line item unit price."], + discount_rate: Annotated[float, "Discount rate as a fraction (e.g. 0.2 for 20%)."], + tax_rate: Annotated[float, "Tax rate as a fraction (e.g. 0.21 for 21%)."], +) -> float: + """Compute a single order line total. + + Formula: qty * unit_price * (1 - discount_rate) * (1 + tax_rate), rounded to 2 decimals. + """ + subtotal = qty * unit_price + discounted = subtotal * (1.0 - discount_rate) + return round(discounted * (1.0 + tax_rate), 2) + + +TOOLS = [list_users, get_orders_for_user, get_discount_rate, get_tax_rate, compute_line_total] + + +# 2. Structured output schema shared between both runs. + + +class UserTotal(BaseModel): + """A user's grand total of all their orders.""" + + user_id: int = Field(description="The user's id.") + name: str = Field(description="The user's display name.") + grand_total: float = Field(description="Sum of all line totals, rounded to 2 decimals.") + + +class UserGrandTotals(BaseModel): + """Structured output schema for both runs.""" + + results: list[UserTotal] = Field(description="One entry per user, sorted by grand_total descending.") + + +INSTRUCTIONS = "You are a careful assistant. Use the provided tools for every lookup and computation." + +BENCHMARK_PROMPT = ( + "For every user in our system (there are 8 of them), compute the grand total of all their orders. " + "Use the compute_line_total tool for each user's orders, after looking up the relevant discount and " + "tax rates for that user. " + "Use the provided tools for EVERY data lookup (users, orders, discount rates, tax rates) and for EVERY " + "line-total computation via compute_line_total — do not invent values or hardcode any numbers. " + "The total per order item should apply the discount first and then the tax " + "(e.g. total = qty * unit_price * (1-discount) * (1+tax)). " + "Return one entry per user, sorted by grand_total descending." +) + + +def get_client() -> FoundryChatClient: + """Create a FoundryChatClient from environment variables.""" + return FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=AzureCliCredential(), + ) + + +# 3. Two runners that share the same tools, prompt, and structured output schema. + + +async def _run_traditional() -> tuple[float, AgentResponse]: + agent = Agent( + client=get_client(), + name="TraditionalAgent", + instructions=INSTRUCTIONS, + tools=TOOLS, + default_options={"response_format": UserGrandTotals}, + ) + start = time.perf_counter() + result = await agent.run(BENCHMARK_PROMPT) + elapsed = time.perf_counter() - start + return elapsed, result + + +async def _run_codeact() -> tuple[float, AgentResponse]: + codeact = HyperlightCodeActProvider( + tools=TOOLS, + approval_mode="never_require", + ) + agent = Agent( + client=get_client(), + name="CodeActAgent", + instructions=INSTRUCTIONS, + context_providers=[codeact], + default_options={"response_format": UserGrandTotals}, + ) + start = time.perf_counter() + result = await agent.run(BENCHMARK_PROMPT) + elapsed = time.perf_counter() - start + return elapsed, result + + +# 4. Report results side by side. + + +def _print_section(title: str) -> None: + bar = "=" * 70 + print(f"\n{bar}\n{title}\n{bar}") + + +def _format_usage(usage: UsageDetails | None) -> str: + if usage is None: + return "usage=" + return ( + f"input={usage.get('input_token_count') or 0:>6} " + f"output={usage.get('output_token_count') or 0:>6} " + f"total={usage.get('total_token_count') or 0:>6}" + ) + + +def _print_results(result: AgentResponse) -> None: + if result.value is not None: + for row in result.value.results: + print(f" user_id={row.user_id:>2} name={row.name:<8} grand_total={row.grand_total:>8.2f}") + else: + print(result.text) + + +async def main() -> None: + """Run the benchmark and print a comparison.""" + trad_time, trad_result = await _run_traditional() + code_time, code_result = await _run_codeact() + + _print_section("Traditional tool-calling") + print(f"time={trad_time:7.2f}s {_format_usage(trad_result.usage_details)}") + _print_results(trad_result) + + _print_section("CodeAct (HyperlightCodeActProvider)") + print(f"time={code_time:7.2f}s {_format_usage(code_result.usage_details)}") + _print_results(code_result) + + _print_section("Comparison") + trad_total = (trad_result.usage_details or {}).get("total_token_count") or 0 + code_total = (code_result.usage_details or {}).get("total_token_count") or 0 + + def pct(new: float, old: float) -> str: + if old == 0: + return "n/a" + delta = (new - old) / old * 100 + sign = "+" if delta >= 0 else "" + return f"{sign}{delta:.1f}%" + + print(f"time : traditional={trad_time:7.2f}s codeact={code_time:7.2f}s delta={pct(code_time, trad_time)}") + print(f"tokens : traditional={trad_total:7d} codeact={code_total:7d} delta={pct(code_total, trad_total)}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/packages/hyperlight/samples/codeact_context_provider.py b/python/packages/hyperlight/samples/codeact_context_provider.py new file mode 100644 index 0000000000..81b55034e5 --- /dev/null +++ b/python/packages/hyperlight/samples/codeact_context_provider.py @@ -0,0 +1,188 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import logging +import os +from collections.abc import Awaitable, Callable +from typing import Annotated, Any, Literal + +from agent_framework import Agent, FunctionInvocationContext, function_middleware, tool +from agent_framework.foundry import FoundryChatClient +from azure.identity import AzureCliCredential +from dotenv import load_dotenv + +from agent_framework_hyperlight import HyperlightCodeActProvider + +"""This sample demonstrates the provider-owned Hyperlight CodeAct flow. + +The sample keeps `compute` and `fetch_data` off the direct agent tool surface and +registers them only with `HyperlightCodeActProvider`. The model therefore sees a +single `execute_code` tool and must call the provider-owned tools from inside +the sandbox with `call_tool(...)`. +""" + +load_dotenv() + +_CYAN = "\033[36m" +_YELLOW = "\033[33m" +_GREEN = "\033[32m" +_DIM = "\033[2m" +_RESET = "\033[0m" + + +class _ColoredFormatter(logging.Formatter): + """Dim logger output so it does not compete with sample prints.""" + + def format(self, record: logging.LogRecord) -> str: + return f"{_DIM}{super().format(record)}{_RESET}" + + +logging.basicConfig(level=logging.WARNING) +logging.getLogger().handlers[0].setFormatter( + _ColoredFormatter("[%(asctime)s] %(levelname)s: %(message)s"), +) + + +@function_middleware +async def log_function_calls( + context: FunctionInvocationContext, + call_next: Callable[[], Awaitable[None]], +) -> None: + """Log tool calls, including readable execute_code blocks.""" + import time + + function_name = context.function.name + arguments = context.arguments if isinstance(context.arguments, dict) else {} + + if function_name == "execute_code" and "code" in arguments: + print(f"\n{_YELLOW}{'─' * 60}") + print("▶ execute_code") + print(f"{'─' * 60}{_RESET}") + print(arguments["code"]) + print(f"{_YELLOW}{'─' * 60}{_RESET}") + else: + pairs = ", ".join(f"{name}={value!r}" for name, value in arguments.items()) + print(f"\n{_YELLOW}▶ {function_name}({pairs}){_RESET}") + + start = time.perf_counter() + await call_next() + elapsed = time.perf_counter() - start + + result = context.result + if function_name == "execute_code" and isinstance(result, list): + for output in result: + if output.type == "text" and output.text: + print(f"{_GREEN}stdout:\n{output.text}{_RESET}") + elif output.type == "error" and output.error_details: + print(f"{_YELLOW}stderr:\n{output.error_details}{_RESET}") + else: + print(f"{_YELLOW}◀ {function_name} → {result!r}{_RESET}") + + print(f"{_DIM} ({elapsed:.4f}s){_RESET}") + + +@tool(approval_mode="never_require") +def compute( + operation: Annotated[ + Literal["add", "subtract", "multiply", "divide"], + "Math operation: add, subtract, multiply, or divide.", + ], + a: Annotated[float, "First numeric operand."], + b: Annotated[float, "Second numeric operand."], +) -> float: + """Perform a math operation for sandboxed code.""" + operations = { + "add": a + b, + "subtract": a - b, + "multiply": a * b, + "divide": a / b if b else float("inf"), + } + return operations[operation] + + +@tool(approval_mode="never_require") +async def fetch_data( + table: Annotated[str, "Name of the simulated table to query."], +) -> list[dict[str, Any]]: + """Fetch records from a named table.""" + await asyncio.sleep(0.5) + data: dict[str, list[dict[str, Any]]] = { + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"}, + {"id": 3, "name": "Charlie", "role": "admin"}, + ], + "products": [ + {"id": 101, "name": "Widget", "price": 9.99}, + {"id": 102, "name": "Gadget", "price": 19.99}, + ], + } + return data.get(table, []) + + +async def main() -> None: + """Run the provider-owned Hyperlight CodeAct sample.""" + # 1. Create the Hyperlight-backed provider and register sandbox tools on it. + codeact = HyperlightCodeActProvider( + tools=[compute, fetch_data], + approval_mode="never_require", + ) + + # 2. Create the client and the agent. + agent = Agent( + client=FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=AzureCliCredential(), + ), + name="HyperlightCodeActProviderAgent", + instructions="You are a helpful assistant.", + context_providers=[codeact], + middleware=[log_function_calls], + ) + + # 3. Run a request that should use execute_code plus provider-owned tools. + query = ( + "Fetch all users, find admins, multiply 7*(3*2), and print the users, " + "admins, and multiplication result. Use execute_code and call_tool(...) " + "inside the sandbox." + ) + print(f"{_CYAN}{'=' * 60}") + print("Hyperlight CodeAct provider sample") + print(f"{'=' * 60}{_RESET}") + print(f"{_CYAN}User: {query}{_RESET}") + result = await agent.run(query) + print(f"{_CYAN}Agent: {result.text}{_RESET}") + + +""" +Sample output (shape only): + +============================================================ +Hyperlight CodeAct provider sample +============================================================ +User: Fetch all users, find admins, multiply 7*(3*2), ... + +──────────────────────────────────────────────────────────── +▶ execute_code +──────────────────────────────────────────────────────────── +users = call_tool("fetch_data", table="users") +admins = [user for user in users if user["role"] == "admin"] +result = call_tool("compute", operation="multiply", a=7, b=6) +print("Users:", users) +print("Admins:", admins) +print("7 * 6 =", result) +──────────────────────────────────────────────────────────── +stdout: +Users: [...] +Admins: [...] +7 * 6 = 42.0 + (0.0xxx s) +Agent: ... +""" + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/packages/hyperlight/samples/codeact_manual_wiring.py b/python/packages/hyperlight/samples/codeact_manual_wiring.py new file mode 100644 index 0000000000..c7a4761efb --- /dev/null +++ b/python/packages/hyperlight/samples/codeact_manual_wiring.py @@ -0,0 +1,133 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import os +from typing import Annotated, Any, Literal + +from agent_framework import Agent, tool +from agent_framework.foundry import FoundryChatClient +from azure.identity import AzureCliCredential +from dotenv import load_dotenv + +from agent_framework_hyperlight import HyperlightExecuteCodeTool + +"""This sample demonstrates manual static wiring of CodeAct without a provider. + +Instead of using `HyperlightCodeActProvider` with `context_providers=`, this +sample creates a `HyperlightExecuteCodeTool` directly, extracts its CodeAct +instructions once, and passes both to the `Agent` constructor at build time. + +This avoids the per-run provider lifecycle (`before_run` / `after_run`) and is +well-suited when the tool registry, file mounts, and network allow-list are +fixed for the agent's lifetime. The tradeoff is that dynamic tool or capability +changes between runs are not supported — any mutations to the tool would not +update the agent's instructions automatically. +""" + +load_dotenv() + + +@tool(approval_mode="never_require") +def compute( + operation: Annotated[ + Literal["add", "subtract", "multiply", "divide"], + "Math operation: add, subtract, multiply, or divide.", + ], + a: Annotated[float, "First numeric operand."], + b: Annotated[float, "Second numeric operand."], +) -> float: + """Perform a math operation used by sandboxed code.""" + operations = { + "add": a + b, + "subtract": a - b, + "multiply": a * b, + "divide": a / b if b else float("inf"), + } + return operations[operation] + + +@tool(approval_mode="never_require") +def fetch_data( + table: Annotated[str, "Name of the simulated table to query."], +) -> list[dict[str, Any]]: + """Fetch simulated records from a named table.""" + data: dict[str, list[dict[str, Any]]] = { + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"}, + {"id": 3, "name": "Charlie", "role": "admin"}, + ], + "products": [ + {"id": 101, "name": "Widget", "price": 9.99}, + {"id": 102, "name": "Gadget", "price": 19.99}, + ], + } + return data.get(table, []) + + +@tool(approval_mode="never_require") +def send_email( + to: Annotated[str, "Recipient email address."], + subject: Annotated[str, "Email subject line."], + body: Annotated[str, "Email body text."], +) -> str: + """Simulate sending an email (direct-only tool, not available inside the sandbox).""" + return f"Email sent to {to}: {subject}" + + +async def main() -> None: + """Run the manual static-wiring sample.""" + # 1. Create the execute_code tool and register sandbox tools on it. + execute_code = HyperlightExecuteCodeTool( + tools=[compute, fetch_data], + approval_mode="never_require", + ) + + # 2. Build CodeAct instructions once. Setting tools_visible_to_model=False + # tells the instructions builder that sandbox tools are not in the agent's + # direct tool list, so the model must use call_tool(...) inside execute_code. + codeact_instructions = execute_code.build_instructions(tools_visible_to_model=False) + + # 3. Create the client and the agent with everything wired at construction time. + # - send_email is a direct-only tool (not available inside the sandbox). + # - execute_code carries sandbox tools (compute, fetch_data) via call_tool. + agent = Agent( + client=FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=AzureCliCredential(), + ), + name="ManualWiringAgent", + instructions=f"You are a helpful assistant.\n\n{codeact_instructions}", + tools=[send_email, execute_code], + ) + + # 4. Run a request that exercises both the sandbox and the direct tool. + print("=" * 60) + print("Manual static-wiring CodeAct sample") + print("=" * 60) + query = ( + "Fetch all users, find admins, multiply 6*7, and print the users, admins, " + "and multiplication result. Use one execute_code call. " + "Then send an email to admin@example.com summarising the results." + ) + print(f"User: {query}") + result = await agent.run(query) + print(f"Agent: {result.text}") + + +""" +Sample output (shape only): + +============================================================ +Manual static-wiring CodeAct sample +============================================================ +User: Fetch all users, find admins, multiply 6*7, ... +Agent: ... +""" + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/packages/hyperlight/samples/codeact_tool.py b/python/packages/hyperlight/samples/codeact_tool.py new file mode 100644 index 0000000000..64c0e6fde5 --- /dev/null +++ b/python/packages/hyperlight/samples/codeact_tool.py @@ -0,0 +1,110 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import os +from typing import Annotated, Any, Literal + +from agent_framework import Agent, tool +from agent_framework.foundry import FoundryChatClient +from azure.identity import AzureCliCredential +from dotenv import load_dotenv + +from agent_framework_hyperlight import HyperlightExecuteCodeTool + +"""This sample demonstrates the standalone Hyperlight execute_code tool. + +The sample adds `HyperlightExecuteCodeTool` directly to the agent. The tool's +own description advertises `call_tool(...)`, the registered sandbox tools, and +the current capability configuration, so no extra CodeAct-specific agent +instructions are required. +""" + +load_dotenv() + + +@tool(approval_mode="never_require") +def compute( + operation: Annotated[ + Literal["add", "subtract", "multiply", "divide"], + "Math operation: add, subtract, multiply, or divide.", + ], + a: Annotated[float, "First numeric operand."], + b: Annotated[float, "Second numeric operand."], +) -> float: + """Perform a math operation used by sandboxed code.""" + operations = { + "add": a + b, + "subtract": a - b, + "multiply": a * b, + "divide": a / b if b else float("inf"), + } + return operations[operation] + + +@tool(approval_mode="never_require") +def fetch_data( + table: Annotated[str, "Name of the simulated table to query."], +) -> list[dict[str, Any]]: + """Fetch simulated records from a named table.""" + data: dict[str, list[dict[str, Any]]] = { + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"}, + {"id": 3, "name": "Charlie", "role": "admin"}, + ], + "products": [ + {"id": 101, "name": "Widget", "price": 9.99}, + {"id": 102, "name": "Gadget", "price": 19.99}, + ], + } + return data.get(table, []) + + +async def main() -> None: + """Run the standalone execute_code sample.""" + # 1. Create the packaged execute_code tool and register sandbox tools on it. + execute_code = HyperlightExecuteCodeTool( + tools=[compute, fetch_data], + approval_mode="never_require", + ) + + # 2. Create the client and the agent. + agent = Agent( + client=FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=AzureCliCredential(), + ), + name="HyperlightExecuteCodeToolAgent", + instructions="You are a helpful assistant.", + tools=execute_code, + ) + + # 3. Run one request through the direct-tool surface. + print("=" * 60) + print("Hyperlight execute_code tool sample") + print("=" * 60) + query = ( + "Fetch all users, find admins, multiply 6*7, and print the users, admins, " + "and multiplication result. Use one execute_code call." + ) + print(f"User: {query}") + result = await agent.run(query) + print(f"Agent: {result.text}") + + +""" +Sample output (shape only): + +============================================================ +Hyperlight execute_code tool sample +============================================================ +User: Fetch all users, find admins, multiply 6*7, ... +Agent: ... +""" + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/packages/hyperlight/tests/hyperlight/test_hyperlight_codeact.py b/python/packages/hyperlight/tests/hyperlight/test_hyperlight_codeact.py new file mode 100644 index 0000000000..ab6a3f7c78 --- /dev/null +++ b/python/packages/hyperlight/tests/hyperlight/test_hyperlight_codeact.py @@ -0,0 +1,939 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import importlib.metadata +import importlib.util +import inspect +import json +import sys +import threading +import time +from collections.abc import Awaitable, Callable, Mapping, MutableSequence +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import pytest +from agent_framework import ( + Agent, + BaseChatClient, + ChatResponse, + ChatResponseUpdate, + Content, + FunctionInvocationLayer, + FunctionTool, + Message, + ResponseStream, + tool, +) + +from agent_framework_hyperlight import AllowedDomain, FileMount, HyperlightCodeActProvider, HyperlightExecuteCodeTool +from agent_framework_hyperlight import _execute_code_tool as execute_code_module + + +def _hyperlight_integration_static_skip_reason() -> str | None: + if sys.version_info >= (3, 14): + return ( + "Hyperlight integration tests require Python < 3.14 because hyperlight-sandbox-backend-wasm is unsupported." + ) + + if sys.platform not in {"linux", "win32"}: + return "Hyperlight integration tests require Linux or Windows runners." + + if importlib.util.find_spec("hyperlight_sandbox") is None: + return "hyperlight-sandbox is not installed." + + if importlib.util.find_spec("python_guest") is None: + return "hyperlight-sandbox-python-guest is not installed." + + try: + importlib.metadata.version("hyperlight-sandbox-backend-wasm") + except importlib.metadata.PackageNotFoundError: + return "hyperlight-sandbox-backend-wasm is not installed." + + return None + + +def _hyperlight_integration_runtime_skip_reason() -> str | None: + if (reason := _hyperlight_integration_static_skip_reason()) is not None: + return reason + + try: + sandbox_cls = execute_code_module._load_sandbox_class() + sandbox = sandbox_cls( + backend=execute_code_module.DEFAULT_HYPERLIGHT_BACKEND, + module=execute_code_module.DEFAULT_HYPERLIGHT_MODULE, + ) + sandbox.run("None") + except RuntimeError as exc: + message = str(exc) + if "no hypervisor was found for sandbox" in message.lower(): + return "Hyperlight integration tests require a runner with a working Hyperlight hypervisor." + + return None + + +def _skip_if_hyperlight_integration_runtime_disabled() -> None: + if (reason := _hyperlight_integration_runtime_skip_reason()) is not None: + pytest.skip(reason) + + +skip_if_hyperlight_integration_tests_disabled = pytest.mark.skipif( + (reason := _hyperlight_integration_static_skip_reason()) is not None, + reason=reason or "Hyperlight integration tests are disabled.", +) + + +@pytest.fixture(scope="module") +def shared_sandbox(): + """Long-lived sandbox with snapshot/restore for read-mostly tests. + + Multiple tests run sequentially against this fixture. Each test restores the + sandbox to a clean state via the ``restored_sandbox`` fixture. + """ + if (reason := _hyperlight_integration_runtime_skip_reason()) is not None: + pytest.skip(reason) + + sandbox_cls = execute_code_module._load_sandbox_class() + sandbox = sandbox_cls( + backend=execute_code_module.DEFAULT_HYPERLIGHT_BACKEND, + module=execute_code_module.DEFAULT_HYPERLIGHT_MODULE, + ) + sandbox.run("None") + snapshot = sandbox.snapshot() + yield sandbox, snapshot + + +@pytest.fixture +def restored_sandbox(shared_sandbox): + """Restore shared sandbox to clean state before each test.""" + sandbox, snapshot = shared_sandbox + sandbox.restore(snapshot) + return sandbox + + +@pytest.fixture +def fresh_sandbox(): + """Short-lived sandbox for tests that alter config meaningfully. + + Not pre-warmed: call ``sandbox.run("None")`` after registering tools + and domains, then snapshot/restore before executing test code. + """ + if (reason := _hyperlight_integration_runtime_skip_reason()) is not None: + pytest.skip(reason) + + sandbox_cls = execute_code_module._load_sandbox_class() + sandbox = sandbox_cls( + backend=execute_code_module.DEFAULT_HYPERLIGHT_BACKEND, + module=execute_code_module.DEFAULT_HYPERLIGHT_MODULE, + temp_output=True, + ) + yield sandbox + + +@tool(approval_mode="never_require") +def compute(a: int, b: int) -> int: + return a + b + + +@tool(approval_mode="always_require") +def dangerous_compute(a: int, b: int) -> int: + return a * b + + +@tool(name="compute", approval_mode="always_require") +def replacement_compute(a: int, b: int) -> int: + return a - b + + +@dataclass(slots=True) +class _FakeResult: + success: bool + stdout: str = "" + stderr: str = "" + + +def _run_in_thread(callback: Callable[[], Any]) -> Any: + result: dict[str, Any] = {} + error: dict[str, BaseException] = {} + + def _runner() -> None: + try: + result["value"] = callback() + except BaseException as exc: + error["value"] = exc + + thread = threading.Thread(target=_runner) + thread.start() + thread.join() + + if "value" in error: + raise error["value"] + + return result.get("value") + + +class _FakeSandbox: + instances: list[_FakeSandbox] = [] + + def __init__( + self, + *, + input_dir: str | None = None, + output_dir: str | None = None, + temp_output: bool = False, + backend: str = "wasm", + module: str | None = None, + module_path: str | None = None, + heap_size: str | None = None, + stack_size: str | None = None, + ) -> None: + self.input_dir = input_dir + self.output_dir = output_dir + self.registered_tools: dict[str, Any] = {} + self.allowed_domains: list[tuple[str, list[str] | None]] = [] + self.restore_calls: list[Any] = [] + self.output_files: list[str] = [] + _FakeSandbox.instances.append(self) + + def register_tool(self, name_or_tool: Any, callback: Any | None = None) -> None: + if callback is None: + raise AssertionError("Expected callback registration for sandbox tools.") + self.registered_tools[str(name_or_tool)] = callback + + def allow_domain(self, target: str, methods: list[str] | None = None) -> None: + self.allowed_domains.append((target, methods)) + + def _invoke_tool(self, name: str, **kwargs: Any) -> Any: + callback = self.registered_tools[name] + if inspect.iscoroutinefunction(callback): + return _run_in_thread(lambda: asyncio.run(callback(**kwargs))) + + result = callback(**kwargs) + if inspect.isawaitable(result): + return _run_in_thread(lambda: asyncio.run(result)) + return result + + def run(self, code: str) -> _FakeResult: + if code == "None": + return _FakeResult(success=True) + if code == "create-output": + if self.output_dir is None: + raise AssertionError("Expected output directory for create-output test.") + Path(self.output_dir, "report.txt").write_text("artifact", encoding="utf-8") + self.output_files = ["report.txt"] + return _FakeResult(success=True, stdout="done\n") + if 'call_tool("compute", a=20, b=22)' in code: + total = self._invoke_tool("compute", a=20, b=22) + return _FakeResult(success=True, stdout=f"{total}\n") + return _FakeResult(success=False, stderr="sandbox boom") + + def snapshot(self) -> str: + return "snapshot" + + def restore(self, snapshot: Any) -> None: + self.restore_calls.append(snapshot) + + def get_output_files(self) -> list[str]: + return list(self.output_files) + + +class _FakeRuntime: + def __init__(self) -> None: + self.calls: list[tuple[Any, str]] = [] + + def execute(self, *, config: Any, code: str) -> list[Content]: + self.calls.append((config, code)) + return [Content.from_text("ok")] + + +class _FakeSandboxWithoutOutputListing(_FakeSandbox): + def get_output_files(self) -> list[str]: + return [] + + +class _FakeSandboxWithDelayedUnlistedOutput(_FakeSandboxWithoutOutputListing): + writer_threads: list[threading.Thread] = [] + + def run(self, code: str) -> _FakeResult: + if 'Path("/output/report.txt").write_text("artifact", encoding="utf-8")' in code: + if self.output_dir is None: + raise AssertionError("Expected output directory for delayed output test.") + + def _write_file() -> None: + time.sleep(0.15) + Path(self.output_dir, "report.txt").write_text("artifact", encoding="utf-8") + + writer_thread = threading.Thread(target=_write_file) + writer_thread.start() + self.writer_threads.append(writer_thread) + return _FakeResult(success=True) + + return super().run(code) + + +class _FakeSessionContext: + def __init__(self, *, tools: list[Any] | None = None) -> None: + self.options: dict[str, Any] = {} + if tools is not None: + self.options["tools"] = tools + self.instructions: list[tuple[str, str]] = [] + self.tools: list[tuple[str, list[Any]]] = [] + + def extend_instructions(self, source_id: str, instructions: str) -> None: + self.instructions.append((source_id, instructions)) + + def extend_tools(self, source_id: str, tools: list[Any]) -> None: + self.tools.append((source_id, tools)) + + +def _extract_text_output(function_result: Content) -> str: + assert function_result.type == "function_result" + assert function_result.exception is None, ( + f"execute_code raised {function_result.exception!r} with items={function_result.items!r}" + ) + text_output = next( + (item for item in function_result.items or [] if item.type == "text" and item.text is not None), + None, + ) + if text_output is not None and text_output.text is not None: + return text_output.text + if function_result.result: + return function_result.result + raise AssertionError(f"Expected text output from execute_code, got {function_result.items!r}") + + +class _FakeCodeActChatClient(FunctionInvocationLayer[Any], BaseChatClient[Any]): + def __init__(self) -> None: + FunctionInvocationLayer.__init__(self) + BaseChatClient.__init__(self) + self.call_count = 0 + + def _inner_get_response( + self, + *, + messages: MutableSequence[Message], + stream: bool, + options: Mapping[str, Any], + **kwargs: Any, + ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]: + if stream: + raise AssertionError("Streaming is not used in this integration test.") + + async def _get_response() -> ChatResponse: + self.call_count += 1 + + if self.call_count == 1: + return ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="execute_code_call", + name="execute_code", + arguments={ + "code": 'total = call_tool("compute", a=20, b=22)\nprint(total)', + }, + ) + ], + ) + ) + + function_results = [ + content for message in messages for content in message.contents if content.type == "function_result" + ] + assert len(function_results) == 1 + + result_content = function_results[0] + assert result_content.call_id == "execute_code_call" + assert _extract_text_output(result_content) == "42\n" + + return ChatResponse(messages=Message(role="assistant", contents=["The sandbox returned 42."])) + + return _get_response() + + +def test_execute_code_tool_updates_approval_with_managed_tools() -> None: + execute_code = HyperlightExecuteCodeTool(tools=[compute], _registry=_FakeRuntime()) + assert execute_code.approval_mode == "never_require" + + execute_code.add_tools([dangerous_compute]) + assert execute_code.approval_mode == "always_require" + + +def test_execute_code_tool_replaces_tools_with_the_same_name() -> None: + execute_code = HyperlightExecuteCodeTool(tools=[compute], _registry=_FakeRuntime()) + + execute_code.add_tools(replacement_compute) + + tools = execute_code.get_tools() + assert len(tools) == 1 + assert tools[0] is replacement_compute + assert execute_code.approval_mode == "always_require" + + +def test_execute_code_tool_accepts_string_and_tuple_file_mounts_without_mode_flags( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + shorthand_file = tmp_path / "notes.txt" + shorthand_file.write_text("hello", encoding="utf-8") + explicit_file = tmp_path / "data.json" + explicit_file.write_text('{"hello": "world"}', encoding="utf-8") + monkeypatch.chdir(tmp_path) + + execute_code = HyperlightExecuteCodeTool(_registry=_FakeRuntime()) + execute_code.add_file_mounts("notes.txt") + execute_code.add_file_mounts((explicit_file, "data/data.json")) + + assert execute_code.get_file_mounts() == [ + FileMount(shorthand_file.resolve(), "/input/notes.txt"), + FileMount(explicit_file.resolve(), "/input/data/data.json"), + ] + + +async def test_execute_code_tool_populates_input_dir_with_workspace_and_file_mounts( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _FakeSandbox.instances.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox) + + workspace_root = tmp_path / "workspace" + workspace_root.mkdir() + (workspace_root / "notes.txt").write_text("workspace note", encoding="utf-8") + + mounted_file = tmp_path / "mounted.txt" + mounted_file.write_text("hello from mount", encoding="utf-8") + + execute_code = HyperlightExecuteCodeTool( + workspace_root=workspace_root, + file_mounts=[FileMount(mounted_file, "data/input.txt")], + ) + result = await execute_code.invoke(arguments={"code": "None"}) + + assert result[0].type == "text" + assert _FakeSandbox.instances[0].input_dir is not None + + input_root = Path(_FakeSandbox.instances[0].input_dir) + assert (input_root / "notes.txt").read_text(encoding="utf-8") == "workspace note" + assert (input_root / "data" / "input.txt").read_text(encoding="utf-8") == "hello from mount" + + +def test_execute_code_tool_allowed_domains_use_structured_entries_and_replace_by_target() -> None: + execute_code = HyperlightExecuteCodeTool(_registry=_FakeRuntime()) + + execute_code.add_allowed_domains(["https://api.example.com/v1", ("github.com", "get")]) + execute_code.add_allowed_domains([ + AllowedDomain("api.example.com", ("post", "get")), + ("github.com", ["head", "get"]), + ]) + + assert execute_code.get_allowed_domains() == [ + AllowedDomain("api.example.com", ("GET", "POST")), + AllowedDomain("github.com", ("GET", "HEAD")), + ] + + +def test_execute_code_tool_description_contains_call_tool_guidance(tmp_path: Path) -> None: + workspace_root = tmp_path / "workspace" + workspace_root.mkdir() + (workspace_root / "notes.txt").write_text("hello", encoding="utf-8") + mount_file = tmp_path / "data.json" + mount_file.write_text('{"hello": "world"}', encoding="utf-8") + + execute_code = HyperlightExecuteCodeTool( + tools=[compute], + workspace_root=workspace_root, + file_mounts=[FileMount(str(mount_file), "data/data.json")], + allowed_domains=[AllowedDomain("https://api.example.com/v1", ("get", "post")), "github.com"], + _registry=_FakeRuntime(), + ) + + description = execute_code.description + + assert "call_tool(name, **kwargs)" in description + assert "compute" in description + assert "/input/data/data.json" in description + assert "/output" in description + assert "api.example.com" in description + assert "GET, POST" in description + assert "github.com" in description + + +async def test_execute_code_tool_executes_with_structured_content(monkeypatch: pytest.MonkeyPatch) -> None: + _FakeSandbox.instances.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox) + + execute_code = HyperlightExecuteCodeTool( + tools=[compute], + file_mounts=[FileMount(Path(__file__), "fixtures/source.py")], + allowed_domains=[("api.example.com", "get")], + ) + + result = await execute_code.invoke(arguments={"code": "create-output"}) + + assert result[0].type == "text" + assert result[0].text == "done\n" + assert any(item.type == "data" for item in result) + assert _FakeSandbox.instances[0].allowed_domains == [("api.example.com", ["GET"])] + assert "compute" in _FakeSandbox.instances[0].registered_tools + + +async def test_execute_code_tool_collects_output_files_without_backend_listing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandboxWithoutOutputListing) + + execute_code = HyperlightExecuteCodeTool( + file_mounts=[FileMount(Path(__file__), "fixtures/source.py")], + ) + result = await execute_code.invoke(arguments={"code": "create-output"}) + + assert result[0].type == "text" + assert any(item.type == "data" and item.additional_properties["path"] == "/output/report.txt" for item in result) + + +async def test_execute_code_tool_waits_for_unlisted_output_files_to_appear( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _FakeSandboxWithDelayedUnlistedOutput.writer_threads.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandboxWithDelayedUnlistedOutput) + + execute_code = HyperlightExecuteCodeTool( + file_mounts=[FileMount(Path(__file__), "fixtures/source.py")], + ) + result = await execute_code.invoke( + arguments={"code": 'Path("/output/report.txt").write_text("artifact", encoding="utf-8")'} + ) + + for writer_thread in _FakeSandboxWithDelayedUnlistedOutput.writer_threads: + writer_thread.join() + + assert any(item.type == "data" and item.additional_properties["path"] == "/output/report.txt" for item in result) + + +async def test_execute_code_tool_failure_returns_error_content(monkeypatch: pytest.MonkeyPatch) -> None: + _FakeSandbox.instances.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox) + + execute_code = HyperlightExecuteCodeTool() + result = await execute_code.invoke(arguments={"code": "fail"}) + + assert result[0].type == "error" + assert result[0].error_details == "sandbox boom" + + +async def test_execute_code_tool_retries_allowed_domains_with_urls_when_backend_rejects_host_targets( + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _FakeStrictNetworkSandbox: + instances: list[_FakeStrictNetworkSandbox] = [] + + def __init__( + self, + *, + input_dir: str | None = None, + output_dir: str | None = None, + backend: str = "wasm", + module: str | None = None, + module_path: str | None = None, + ) -> None: + del input_dir, output_dir, backend, module, module_path + self.allowed_domains: list[tuple[str, list[str] | None]] = [] + _FakeStrictNetworkSandbox.instances.append(self) + + def register_tool(self, name_or_tool: Any, callback: Any | None = None) -> None: + del name_or_tool, callback + + def allow_domain(self, target: str, methods: list[str] | None = None) -> None: + self.allowed_domains.append((target, methods)) + + def run(self, code: str) -> _FakeResult: + if code == "None" and any("://" not in target for target, _ in self.allowed_domains): + raise RuntimeError("invalid URL for network permission: ") + return _FakeResult(success=True) + + def snapshot(self) -> str: + return "snapshot" + + def restore(self, snapshot: Any) -> None: + del snapshot + + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeStrictNetworkSandbox) + + execute_code = HyperlightExecuteCodeTool(allowed_domains=[("127.0.0.1:8080", "get")]) + result = await execute_code.invoke(arguments={"code": "None"}) + + assert result[0].type == "text" + assert len(_FakeStrictNetworkSandbox.instances) == 2 + assert _FakeStrictNetworkSandbox.instances[0].allowed_domains == [("127.0.0.1:8080", ["GET"])] + assert _FakeStrictNetworkSandbox.instances[1].allowed_domains == [ + ("http://127.0.0.1:8080", ["GET"]), + ("https://127.0.0.1:8080", ["GET"]), + ] + + +def test_hyperlight_integration_runtime_skip_reason_reports_missing_hypervisor(monkeypatch: pytest.MonkeyPatch) -> None: + class _FakeNoHypervisorSandbox: + def __init__( + self, + *, + input_dir: str | None = None, + output_dir: str | None = None, + backend: str = "wasm", + module: str | None = None, + module_path: str | None = None, + ) -> None: + del input_dir, output_dir, backend, module, module_path + + def run(self, code: str) -> _FakeResult: + del code + raise RuntimeError("failed to build ProtoWasmSandbox: No Hypervisor was found for Sandbox") + + original_find_spec = importlib.util.find_spec + + def _fake_find_spec(name: str) -> object | None: + if name in {"hyperlight_sandbox", "python_guest"}: + return object() + return original_find_spec(name) + + monkeypatch.setattr(sys, "version_info", (3, 13, 0)) + monkeypatch.setattr(sys, "platform", "linux") + monkeypatch.setattr(importlib.util, "find_spec", _fake_find_spec) + monkeypatch.setattr(importlib.metadata, "version", lambda _: "0.0.0") + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeNoHypervisorSandbox) + + assert _hyperlight_integration_runtime_skip_reason() == ( + "Hyperlight integration tests require a runner with a working Hyperlight hypervisor." + ) + + +async def test_provider_injects_run_scoped_execute_code_tool() -> None: + runtime = _FakeRuntime() + provider = HyperlightCodeActProvider(tools=[compute], _registry=runtime) + context = _FakeSessionContext(tools=[dangerous_compute]) + state: dict[str, Any] = {} + + await provider.before_run(agent=object(), session=None, context=context, state=state) + + assert context.options["tools"] == [dangerous_compute] + assert len(context.instructions) == 1 + assert len(context.tools) == 1 + + run_tool = context.tools[0][1][0] + assert isinstance(run_tool, HyperlightExecuteCodeTool) + assert run_tool.approval_mode == "never_require" + assert [tool_obj.name for tool_obj in run_tool.get_tools()] == ["compute"] + assert "dangerous_compute" not in context.instructions[0][1] + assert "compute" not in context.instructions[0][1] + assert "Filesystem capabilities:" not in context.instructions[0][1] + assert state[provider.source_id]["tool_names"] == ["compute"] + assert state[provider.source_id]["approval_mode"] == "never_require" + json.dumps(state) + + provider.remove_tool("compute") + assert [tool_obj.name for tool_obj in run_tool.get_tools()] == ["compute"] + + +async def test_agent_runs_hyperlight_codeact_end_to_end_with_fake_sandbox(monkeypatch: pytest.MonkeyPatch) -> None: + _FakeSandbox.instances.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox) + + client = _FakeCodeActChatClient() + provider = HyperlightCodeActProvider(tools=[compute]) + agent = Agent(client=client, context_providers=[provider]) + + response = await agent.run("Use the sandbox to add 20 and 22.") + + assert response.text == "The sandbox returned 42." + assert client.call_count == 2 + assert len(_FakeSandbox.instances) == 1 + assert "compute" in _FakeSandbox.instances[0].registered_tools + + +@skip_if_hyperlight_integration_tests_disabled +async def test_agent_runs_hyperlight_codeact_end_to_end_with_real_sandbox() -> None: + _skip_if_hyperlight_integration_runtime_disabled() + + client = _FakeCodeActChatClient() + provider = HyperlightCodeActProvider(tools=[compute]) + agent = Agent(client=client, context_providers=[provider]) + + response = await agent.run("Use the sandbox to add 20 and 22.") + + assert response.text == "The sandbox returned 42." + assert client.call_count == 2 + + +@skip_if_hyperlight_integration_tests_disabled +async def test_provider_run_tool_writes_files_with_real_sandbox(tmp_path: Path) -> None: + _skip_if_hyperlight_integration_runtime_disabled() + + workspace_root = tmp_path / "workspace" + workspace_root.mkdir() + provider = HyperlightCodeActProvider(workspace_root=workspace_root) + + context = _FakeSessionContext() + state: dict[str, Any] = {} + await provider.before_run(agent=object(), session=None, context=context, state=state) + + run_tool = context.tools[0][1][0] + assert isinstance(run_tool, HyperlightExecuteCodeTool) + + result = await run_tool.invoke( + arguments={ + "code": ( + 'payload = "hello from sandbox"\n' + "output_path = None\n" + 'for candidate in ("/output/result.txt",):\n' + " try:\n" + ' with open(candidate, "w", encoding="utf-8") as f:\n' + " f.write(payload)\n" + " except OSError:\n" + " continue\n" + " output_path = candidate\n" + " break\n" + 'assert output_path is not None, "output path unavailable"\n' + 'print("validated")\n' + ) + } + ) + + outputs = result + error_outputs = [ + f"{item.message}: {item.error_details}" + for item in outputs + if item.type == "error" and item.error_details is not None + ] + assert not error_outputs, error_outputs + + text_output = next((item for item in outputs if item.type == "text" and item.text is not None), None) + if text_output is not None: + assert text_output.text == "validated\n" + + file_output = next((item for item in outputs if item.type == "data"), None) + if file_output is not None: + assert file_output.uri is not None and file_output.uri.startswith("data:") + assert file_output.additional_properties["path"] in {"/output/result.txt", "/output/output/result.txt"} + + +@pytest.mark.integration +@skip_if_hyperlight_integration_tests_disabled +@pytest.mark.skipif(sys.platform == "win32", reason="Hyperlight WASM sandbox lacks encodings.idna on Windows") +async def test_provider_run_tool_pings_bing_with_real_sandbox() -> None: + _skip_if_hyperlight_integration_runtime_disabled() + + provider = HyperlightCodeActProvider() + provider.add_allowed_domains("bing.com") + + context = _FakeSessionContext() + state: dict[str, Any] = {} + await provider.before_run(agent=object(), session=None, context=context, state=state) + + run_tool = context.tools[0][1][0] + assert isinstance(run_tool, HyperlightExecuteCodeTool) + + result = await run_tool.invoke( + arguments={ + "code": ( + "import _socket\n\n" + 'addresses = _socket.getaddrinfo("bing.com", 80, _socket.AF_INET, _socket.SOCK_STREAM)\n' + 'assert addresses, "bing.com did not resolve"\n' + "last_error = None\n" + "for family, socktype, proto, _, sockaddr in addresses:\n" + " connection = None\n" + " try:\n" + " connection = _socket.socket(family, socktype, proto)\n" + " connection.settimeout(10)\n" + " connection.connect(sockaddr)\n" + ' print("pinged bing.com")\n' + " break\n" + " except OSError as exc:\n" + " last_error = exc\n" + " finally:\n" + " if connection is not None:\n" + " try:\n" + " connection.close()\n" + " except OSError:\n" + " pass\n" + "else:\n" + ' raise last_error or RuntimeError("unable to reach bing.com")\n' + ) + } + ) + + outputs = result + error_outputs = [ + f"{item.message}: {item.error_details}" + for item in outputs + if item.type == "error" and item.error_details is not None + ] + assert not error_outputs, error_outputs + + text_output = next((item for item in outputs if item.type == "text" and item.text is not None), None) + if text_output is not None: + assert text_output.text == "pinged bing.com\n" + + +# --------------------------------------------------------------------------- +# Real-sandbox tests using shared (long-lived) fixture +# --------------------------------------------------------------------------- + + +@skip_if_hyperlight_integration_tests_disabled +async def test_sandbox_runs_simple_code(restored_sandbox) -> None: + result = restored_sandbox.run('print("hello")') + assert result.success + assert "hello" in result.stdout + + +@skip_if_hyperlight_integration_tests_disabled +async def test_sandbox_stdout_and_stderr_captured(restored_sandbox) -> None: + result = restored_sandbox.run('import sys\nprint("out")\nprint("err", file=sys.stderr)') + assert result.success + assert "out" in result.stdout + assert "err" in result.stderr + + +@skip_if_hyperlight_integration_tests_disabled +async def test_sandbox_code_failure_returns_nonzero_exit(restored_sandbox) -> None: + result = restored_sandbox.run("raise ValueError('boom')") + assert not result.success + assert "boom" in result.stderr + + +@skip_if_hyperlight_integration_tests_disabled +async def test_sandbox_snapshot_restore_keeps_sandbox_functional(restored_sandbox) -> None: + """Verify snapshot/restore cycle leaves the sandbox in a working state.""" + # Mutate the sandbox + result1 = restored_sandbox.run('print("before snapshot")') + assert result1.success + + # Take a snapshot and restore + snapshot = restored_sandbox.snapshot() + restored_sandbox.restore(snapshot) + + # Sandbox still works after restore + result2 = restored_sandbox.run('print("after restore")') + assert result2.success + assert "after restore" in result2.stdout + + +# --------------------------------------------------------------------------- +# Real-sandbox tests using fresh (short-lived) fixture +# --------------------------------------------------------------------------- + + +@skip_if_hyperlight_integration_tests_disabled +async def test_sandbox_with_tool_registration_and_execution(fresh_sandbox) -> None: + """Verify that a sync host tool round-trips via call_tool in the real sandbox.""" + + def multiply(a: int, b: int) -> int: + return a * b + + fresh_sandbox.register_tool("multiply", multiply) + fresh_sandbox.run("None") + snapshot = fresh_sandbox.snapshot() + fresh_sandbox.restore(snapshot) + result = fresh_sandbox.run('result = call_tool("multiply", a=6, b=7)\nprint(result)') + assert result.success + assert "42" in result.stdout + + +@skip_if_hyperlight_integration_tests_disabled +async def test_sandbox_async_callback_round_trips_with_real_sandbox(fresh_sandbox) -> None: + """Confirm that _make_sandbox_callback (sync wrapper) works with real FFI.""" + sandbox_tool = FunctionTool( + func=compute, + name="compute", + description="Add two numbers", + ) + callback = execute_code_module._make_sandbox_callback(sandbox_tool) + + fresh_sandbox.register_tool("compute", callback) + fresh_sandbox.run("None") + snapshot = fresh_sandbox.snapshot() + fresh_sandbox.restore(snapshot) + result = fresh_sandbox.run('total = call_tool("compute", a=20, b=22)\nprint(total)') + assert result.success + assert "42" in result.stdout + + +@skip_if_hyperlight_integration_tests_disabled +async def test_output_dir_cleared_between_invocations() -> None: + """Verify stale output files don't leak across invocations (comment 23).""" + _skip_if_hyperlight_integration_runtime_disabled() + + provider = HyperlightCodeActProvider(workspace_root=Path(__file__).parent) + context = _FakeSessionContext() + state: dict[str, Any] = {} + await provider.before_run(agent=object(), session=None, context=context, state=state) + + run_tool = context.tools[0][1][0] + assert isinstance(run_tool, HyperlightExecuteCodeTool) + + # First invocation: write a file + result1 = await run_tool.invoke( + arguments={"code": ('with open("/output/stale.txt", "w") as f:\n f.write("first")\nprint("wrote")\n')} + ) + assert result1[0].type == "text" or result1[0].type == "data" + outputs1 = result1 + assert any( + item.type == "data" and "stale.txt" in (item.additional_properties or {}).get("path", "") for item in outputs1 + ), "First invocation should produce stale.txt" + + # Second invocation: no file writes + result2 = await run_tool.invoke(arguments={"code": 'print("clean")\n'}) + outputs2 = result2 + stale_files = [ + item + for item in outputs2 + if item.type == "data" and "stale.txt" in (item.additional_properties or {}).get("path", "") + ] + assert not stale_files, "Stale output file leaked into second invocation" + + +@skip_if_hyperlight_integration_tests_disabled +async def test_run_code_does_not_block_event_loop() -> None: + """Verify _run_code uses asyncio.to_thread so the event loop stays responsive (comment 26).""" + _skip_if_hyperlight_integration_runtime_disabled() + + provider = HyperlightCodeActProvider() + context = _FakeSessionContext() + state: dict[str, Any] = {} + await provider.before_run(agent=object(), session=None, context=context, state=state) + + run_tool = context.tools[0][1][0] + assert isinstance(run_tool, HyperlightExecuteCodeTool) + + # Monkeypatch the registry.execute to block on an event, proving the event loop + # stays responsive while the worker thread is blocked. + release = threading.Event() + async_started = asyncio.Event() + loop = asyncio.get_running_loop() + original_execute = run_tool._registry.execute + + def _blocking_execute(*, config, code): + loop.call_soon_threadsafe(async_started.set) + release.wait(timeout=10) + return original_execute(config=config, code=code) + + run_tool._registry.execute = _blocking_execute # type: ignore[assignment] + + concurrent_ran = False + + async def _concurrent_task(): + nonlocal concurrent_ran + await async_started.wait() + concurrent_ran = True + release.set() + + code_task = asyncio.create_task(run_tool.invoke(arguments={"code": 'print("done")\n'})) + await _concurrent_task() + result = await code_task + + assert concurrent_ran, "Event loop was blocked during sandbox execution" + assert result[0].type == "text" diff --git a/python/packages/openai/agent_framework_openai/_chat_client.py b/python/packages/openai/agent_framework_openai/_chat_client.py index 4aba988b39..5b7584dc6d 100644 --- a/python/packages/openai/agent_framework_openai/_chat_client.py +++ b/python/packages/openai/agent_framework_openai/_chat_client.py @@ -549,6 +549,7 @@ class RawOpenAIChatClient( # type: ignore[misc] chunk, options=validated_options, function_call_ids=function_call_ids, + seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids, ) else: async for chunk in await client.responses.create(stream=True, **run_options): @@ -556,6 +557,7 @@ class RawOpenAIChatClient( # type: ignore[misc] chunk, options=validated_options, function_call_ids=function_call_ids, + seen_reasoning_delta_item_ids=seen_reasoning_delta_item_ids, ) except Exception as ex: self._handle_request_error(ex) @@ -1587,6 +1589,54 @@ class RawOpenAIChatClient( # type: ignore[misc] """Join shell commands into a single executable command string.""" return "\n".join(command for command in commands if command).strip() + @staticmethod + def _serialize_provider_payload(value: Any) -> Any: + """Convert OpenAI SDK objects into JSON-serializable Python values.""" + if isinstance(value, BaseModel): + return value.model_dump(mode="json", exclude_none=True) + if isinstance(value, Mapping): + return {str(key): RawOpenAIChatClient._serialize_provider_payload(item) for key, item in value.items()} # type: ignore[reportUnknownVariableType] + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return [RawOpenAIChatClient._serialize_provider_payload(item) for item in value] # type: ignore[reportUnknownVariableType] + return value + + @staticmethod + def _get_search_tool_name(item_type: str) -> str: + """Map OpenAI search output item types to unified content tool names.""" + return "web_search" if item_type == "web_search_call" else "file_search" + + def _parse_search_tool_call_content(self, item: Any) -> Content: + """Create unified search tool call content from an OpenAI search output item.""" + item_type = getattr(item, "type", "") + call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or "" + if item_type == "web_search_call": + arguments = self._serialize_provider_payload(getattr(item, "action", None)) + else: + arguments = {"queries": list(getattr(item, "queries", []) or [])} + return Content.from_search_tool_call( + call_id=call_id, + tool_name=self._get_search_tool_name(item_type), + arguments=arguments, + status=getattr(item, "status", None), + raw_representation=item, + ) + + def _parse_search_tool_result_content(self, item: Any) -> Content: + """Create unified search tool result content from an OpenAI search output item.""" + item_type = getattr(item, "type", "") + call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or "" + if item_type == "web_search_call": + result = {"action": self._serialize_provider_payload(getattr(item, "action", None))} + else: + result = {"results": self._serialize_provider_payload(getattr(item, "results", None))} + return Content.from_search_tool_result( + call_id=call_id, + tool_name=self._get_search_tool_name(item_type), + result=result, + status=getattr(item, "status", None), + raw_representation=item, + ) + # region Parse methods def _parse_response_from_openai( self, @@ -1788,6 +1838,9 @@ class RawOpenAIChatClient( # type: ignore[misc] raw_representation=item, ) ) + case "web_search_call" | "file_search_call": + contents.append(self._parse_search_tool_call_content(item)) + contents.append(self._parse_search_tool_result_content(item)) case "mcp_approval_request": # ResponseOutputMcpApprovalRequest contents.append( Content.from_function_approval_request( @@ -2377,8 +2430,19 @@ class RawOpenAIChatClient( # type: ignore[misc] additional_properties=additional_properties_empty or None, ) ) + case "web_search_call" | "file_search_call": + contents.append(self._parse_search_tool_call_content(event_item)) case _: logger.debug("Unparsed event of type: %s: %s", event.type, event) + case ( + "response.web_search_call.in_progress" + | "response.web_search_call.searching" + | "response.web_search_call.completed" + | "response.file_search_call.in_progress" + | "response.file_search_call.searching" + | "response.file_search_call.completed" + ): + pass case "response.function_call_arguments.delta": call_id, name = function_call_ids.get(event.output_index, (None, None)) if call_id and name: @@ -2514,6 +2578,8 @@ class RawOpenAIChatClient( # type: ignore[misc] raw_representation=done_item, ) ) + elif getattr(done_item, "type", None) in ("web_search_call", "file_search_call"): + contents.append(self._parse_search_tool_result_content(done_item)) case _: logger.debug("Unparsed event of type: %s: %s", event.type, event) diff --git a/python/packages/openai/tests/openai/test_openai_chat_client.py b/python/packages/openai/tests/openai/test_openai_chat_client.py index 4472a218bc..8c956a6339 100644 --- a/python/packages/openai/tests/openai/test_openai_chat_client.py +++ b/python/packages/openai/tests/openai/test_openai_chat_client.py @@ -7,7 +7,7 @@ import os from datetime import datetime, timezone from pathlib import Path from typing import Annotated, Any -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest from agent_framework import ( @@ -71,6 +71,35 @@ class OutputStruct(BaseModel): weather: str | None = None +class _FakeAsyncEventStream: + def __init__(self, events: list[object]) -> None: + self._events = events + self._iterator = iter(()) + + def __aiter__(self) -> "_FakeAsyncEventStream": + self._iterator = iter(self._events) + return self + + async def __anext__(self) -> object: + try: + return next(self._iterator) + except StopIteration as exc: + raise StopAsyncIteration from exc + + +class _FakeAsyncEventStreamContext(_FakeAsyncEventStream): + async def __aenter__(self) -> "_FakeAsyncEventStreamContext": + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: object | None, + ) -> None: + return None + + async def create_vector_store( client: OpenAIChatClient, ) -> tuple[str, Content]: @@ -1250,6 +1279,91 @@ def test_response_content_creation_with_function_call() -> None: assert function_call.arguments == '{"location": "Seattle"}' +def test_parse_response_from_openai_with_web_search_call() -> None: + """Test _parse_response_from_openai with web search output.""" + client = OpenAIChatClient(model="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.id = "resp-web" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + + mock_search_item = MagicMock() + mock_search_item.type = "web_search_call" + mock_search_item.id = "ws_123" + mock_search_item.status = "completed" + mock_search_item.action = { + "type": "search", + "query": "current weather in Seattle", + "queries": ["current weather in Seattle"], + "sources": [{"title": "Weather", "url": "https://weather.example"}], + } + + mock_response.output = [mock_search_item] + + response = client._parse_response_from_openai(mock_response, options={}) # type: ignore + + assert len(response.messages[0].contents) == 2 + call_content, result_content = response.messages[0].contents + assert call_content.type == "search_tool_call" + assert call_content.call_id == "ws_123" + assert call_content.tool_name == "web_search" + assert call_content.status == "completed" + assert call_content.arguments == mock_search_item.action + assert result_content.type == "search_tool_result" + assert result_content.call_id == "ws_123" + assert result_content.tool_name == "web_search" + assert result_content.status == "completed" + assert result_content.result == {"action": mock_search_item.action} + + +def test_parse_response_from_openai_with_file_search_call() -> None: + """Test _parse_response_from_openai with file search output.""" + client = OpenAIChatClient(model="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.id = "resp-file" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + + mock_search_item = MagicMock() + mock_search_item.type = "file_search_call" + mock_search_item.id = "fs_123" + mock_search_item.status = "completed" + mock_search_item.queries = ["weather history"] + mock_search_item.results = [ + { + "file_id": "file_1", + "filename": "weather.txt", + "score": 0.9, + "text": "Seattle was cloudy.", + } + ] + + mock_response.output = [mock_search_item] + + response = client._parse_response_from_openai(mock_response, options={}) # type: ignore + + assert len(response.messages[0].contents) == 2 + call_content, result_content = response.messages[0].contents + assert call_content.type == "search_tool_call" + assert call_content.call_id == "fs_123" + assert call_content.tool_name == "file_search" + assert call_content.status == "completed" + assert call_content.arguments == {"queries": ["weather history"]} + assert result_content.type == "search_tool_result" + assert result_content.call_id == "fs_123" + assert result_content.tool_name == "file_search" + assert result_content.status == "completed" + assert result_content.result == {"results": mock_search_item.results} + + def test_prepare_content_for_opentool_approval_response() -> None: """Test _prepare_content_for_openai with function approval response content.""" client = OpenAIChatClient(model="test-model", api_key="test-key") @@ -1394,6 +1508,86 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None: assert result_content.output is not None +def test_parse_chunk_from_openai_with_web_search_call_added() -> None: + """Test that response.output_item.added for web_search_call emits search tool call content.""" + client = OpenAIChatClient(model="test-model", api_key="test-key") + chat_options = ChatOptions() + function_call_ids: dict[int, tuple[str, str]] = {} + + mock_event = MagicMock() + mock_event.type = "response.output_item.added" + mock_event.output_index = 0 + + mock_item = MagicMock() + mock_item.type = "web_search_call" + mock_item.id = "ws_call_123" + mock_item.status = "in_progress" + mock_item.action = {"type": "search", "query": "weather in Seattle"} + mock_event.item = mock_item + + update = client._parse_chunk_from_openai(mock_event, options=chat_options, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + content = update.contents[0] + assert content.type == "search_tool_call" + assert content.call_id == "ws_call_123" + assert content.tool_name == "web_search" + assert content.status == "in_progress" + assert content.arguments == {"type": "search", "query": "weather in Seattle"} + + +def test_parse_chunk_from_openai_with_file_search_call_done() -> None: + """Test that response.output_item.done for file_search_call emits search tool result content.""" + client = OpenAIChatClient(model="test-model", api_key="test-key") + chat_options = ChatOptions() + function_call_ids: dict[int, tuple[str, str]] = {} + + mock_event = MagicMock() + mock_event.type = "response.output_item.done" + + mock_item = MagicMock() + mock_item.type = "file_search_call" + mock_item.id = "fs_call_123" + mock_item.status = "completed" + mock_item.results = [{"file_id": "file_1", "text": "Seattle was cloudy."}] + mock_event.item = mock_item + + update = client._parse_chunk_from_openai(mock_event, options=chat_options, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + content = update.contents[0] + assert content.type == "search_tool_result" + assert content.call_id == "fs_call_123" + assert content.tool_name == "file_search" + assert content.status == "completed" + assert content.result == {"results": [{"file_id": "file_1", "text": "Seattle was cloudy."}]} + + +@pytest.mark.parametrize( + "event_type", + [ + "response.web_search_call.in_progress", + "response.web_search_call.searching", + "response.web_search_call.completed", + "response.file_search_call.in_progress", + "response.file_search_call.searching", + "response.file_search_call.completed", + ], +) +def test_parse_chunk_from_openai_ignores_search_progress_events(event_type: str) -> None: + """Search progress events should be explicitly ignored instead of logged as unparsed.""" + client = OpenAIChatClient(model="test-model", api_key="test-key") + chat_options = ChatOptions() + function_call_ids: dict[int, tuple[str, str]] = {} + + mock_event = MagicMock() + mock_event.type = event_type + + update = client._parse_chunk_from_openai(mock_event, options=chat_options, function_call_ids=function_call_ids) + + assert update.contents == [] + + def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None: """Test that response.output_item.added for mcp_call emits only the call, not the result. @@ -2716,6 +2910,48 @@ async def test_get_response_streaming_with_response_format() -> None: await run_streaming() +async def test_inner_get_response_streaming_with_response_format_tracks_reasoning_delta_ids() -> None: + """The responses.stream path should suppress reasoning done events after deltas.""" + client = OpenAIChatClient(model="test-model", api_key="test-key") + messages = [Message(role="user", contents=["Test streaming with format"])] + item_id = "reasoning_stream" + events = [ + ResponseReasoningTextDeltaEvent( + type="response.reasoning_text.delta", + content_index=0, + item_id=item_id, + output_index=0, + sequence_number=1, + delta="Hello ", + ), + ResponseReasoningTextDoneEvent( + type="response.reasoning_text.done", + content_index=0, + item_id=item_id, + output_index=0, + sequence_number=2, + text="Hello ", + ), + ] + + with ( + patch.object( + client, + "_prepare_request", + new=AsyncMock(return_value=(client.client, {"text_format": OutputStruct}, {})), + ), + patch.object(client.client.responses, "stream", return_value=_FakeAsyncEventStreamContext(events)), + patch.object(client, "_get_metadata_from_response", return_value={}), + ): + stream = client._inner_get_response(messages=messages, options={}, stream=True) + updates = [update async for update in stream] + + reasoning_chunks = [ + content.text for update in updates for content in update.contents if content.type == "text_reasoning" + ] + assert reasoning_chunks == ["Hello "] + + def test_prepare_content_for_openai_image_content() -> None: """Test _prepare_content_for_openai with image content variations.""" client = OpenAIChatClient(model="test-model", api_key="test-key") @@ -3153,6 +3389,44 @@ def test_streaming_reasoning_deltas_then_done_no_duplication() -> None: assert "".join(c.text for c in all_contents) == "Hello world" +async def test_inner_get_response_streaming_create_tracks_reasoning_delta_ids() -> None: + """The responses.create(stream=True) path should suppress reasoning done events after deltas.""" + client = OpenAIChatClient(model="test-model", api_key="test-key") + messages = [Message(role="user", contents=["Test streaming"])] + item_id = "reasoning_create" + events = [ + ResponseReasoningTextDeltaEvent( + type="response.reasoning_text.delta", + content_index=0, + item_id=item_id, + output_index=0, + sequence_number=1, + delta="Hello ", + ), + ResponseReasoningTextDoneEvent( + type="response.reasoning_text.done", + content_index=0, + item_id=item_id, + output_index=0, + sequence_number=2, + text="Hello ", + ), + ] + + with ( + patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))), + patch.object(client.client.responses, "create", new=AsyncMock(return_value=_FakeAsyncEventStream(events))), + patch.object(client, "_get_metadata_from_response", return_value={}), + ): + stream = client._inner_get_response(messages=messages, options={}, stream=True) + updates = [update async for update in stream] + + reasoning_chunks = [ + content.text for update in updates for content in update.contents if content.type == "text_reasoning" + ] + assert reasoning_chunks == ["Hello "] + + def test_streaming_reasoning_events_preserve_metadata() -> None: """Test that reasoning events preserve metadata like regular text events.""" client = OpenAIChatClient(model="test-model", api_key="test-key") @@ -3890,26 +4164,22 @@ async def test_integration_tool_rich_content_image() -> None: client = OpenAIChatClient() client.function_invocation_configuration["max_iterations"] = 2 - for streaming in [False, True]: - messages = [ - Message( - role="user", - contents=["Call the get_test_image tool and describe what you see."], - ) - ] - options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"} + messages = [ + Message( + role="user", + contents=["Call the get_test_image tool and describe what you see."], + ) + ] + options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"} - if streaming: - response = await client.get_response(messages=messages, stream=True, options=options).get_final_response() - else: - response = await client.get_response(messages=messages, options=options) + response = await client.get_response(messages=messages, stream=True, options=options).get_final_response() - assert response is not None - assert isinstance(response, ChatResponse) - assert response.text is not None - assert len(response.text) > 0 - # sample_image.jpg contains a photo of a house; the model should mention it. - assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}" + assert response is not None + assert isinstance(response, ChatResponse) + assert response.text is not None + assert len(response.text) > 0 + # sample_image.jpg contains a photo of a house; the model should mention it. + assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}" @pytest.mark.flaky diff --git a/python/packages/openai/tests/openai/test_openai_chat_client_azure.py b/python/packages/openai/tests/openai/test_openai_chat_client_azure.py index 4bec80f6b7..b16fbd0f7f 100644 --- a/python/packages/openai/tests/openai/test_openai_chat_client_azure.py +++ b/python/packages/openai/tests/openai/test_openai_chat_client_azure.py @@ -486,6 +486,7 @@ async def test_integration_client_agent_existing_session() -> None: @pytest.mark.integration @skip_if_azure_openai_integration_tests_disabled @_with_azure_openai_debug() +@pytest.mark.skip(reason="Azure OpenAI is flaky when handling image content as function result. Needs investigation.") async def test_azure_openai_chat_client_tool_rich_content_image() -> None: image_path = Path(__file__).parent.parent / "assets" / "sample_image.jpg" image_bytes = image_path.read_bytes() @@ -499,21 +500,12 @@ async def test_azure_openai_chat_client_tool_rich_content_image() -> None: client = OpenAIChatClient(credential=credential) client.function_invocation_configuration["max_iterations"] = 2 - for streaming in [False, True]: - messages = [Message(role="user", contents=["Call the get_test_image tool and describe what you see."])] - options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"} + response = await client.get_response( + messages=[Message(role="user", contents=["Call the get_test_image tool and describe what you see."])], + stream=True, + options={"tools": [get_test_image], "tool_choice": "auto"}, + ).get_final_response() - if streaming: - response = await client.get_response( - messages=messages, - stream=True, - options=options, - ).get_final_response() - else: - response = await client.get_response(messages=messages, options=options) - - assert isinstance(response, ChatResponse) - assert response.text is not None - assert "house" in response.text.lower(), ( - f"Model did not describe the house image. Response: {response.text}" - ) + assert isinstance(response, ChatResponse) + assert response.text is not None + assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}" diff --git a/python/pyproject.toml b/python/pyproject.toml index 252fc024f8..2e79ae4fec 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -83,6 +83,7 @@ agent-framework-foundry-hosting = { workspace = true } agent-framework-foundry-local = { workspace = true } agent-framework-gemini = { workspace = true } agent-framework-github-copilot = { workspace = true } +agent-framework-hyperlight = { workspace = true } agent-framework-lab = { workspace = true } agent-framework-mem0 = { workspace = true } agent-framework-ollama = { workspace = true } diff --git a/python/samples/02-agents/context_providers/README.md b/python/samples/02-agents/context_providers/README.md index 04f3a1395f..7c34e10518 100644 --- a/python/samples/02-agents/context_providers/README.md +++ b/python/samples/02-agents/context_providers/README.md @@ -7,6 +7,7 @@ These samples demonstrate how to use context providers to enrich agent conversat | File / Folder | Description | |---------------|-------------| | [`simple_context_provider.py`](simple_context_provider.py) | Implement a custom context provider by extending `ContextProvider` to extract and inject structured user information across turns. | +| [`foundry_toolbox_context_provider.py`](foundry_toolbox_context_provider.py) | Compose a Microsoft Foundry toolbox with a `ContextProvider` that caches the toolbox once and picks a subset of its tools per-turn via `select_toolbox_tools`, driven by keywords in the latest user message. | | [`azure_ai_foundry_memory.py`](azure_ai_foundry_memory.py) | Use `FoundryMemoryProvider` to add semantic memory — automatically retrieves, searches, and stores memories via Azure AI Foundry. | | [`azure_ai_search/`](azure_ai_search/) | Retrieval Augmented Generation (RAG) with Azure AI Search in semantic and agentic modes. See its own [README](azure_ai_search/README.md). | | [`mem0/`](mem0/) | Memory-powered context using the Mem0 integration (open-source and managed). See its own [README](mem0/README.md). | @@ -19,6 +20,12 @@ These samples demonstrate how to use context providers to enrich agent conversat - `FOUNDRY_MODEL`: Model deployment name - Azure CLI authentication (`az login`) +**For `foundry_toolbox_context_provider.py`:** +- `FOUNDRY_PROJECT_ENDPOINT`: Your Microsoft Foundry project endpoint +- `FOUNDRY_MODEL`: Model deployment name +- A toolbox already configured in that project; set `TOOLBOX_NAME` / `TOOLBOX_VERSION` at the top of the sample +- Azure CLI authentication (`az login`) + **For `azure_ai_foundry_memory.py`:** - `FOUNDRY_PROJECT_ENDPOINT`: Your Azure AI Foundry project endpoint - `FOUNDRY_MODEL`: Chat/responses model deployment name diff --git a/python/samples/02-agents/context_providers/azure_ai_search/README.md b/python/samples/02-agents/context_providers/azure_ai_search/README.md index 9e5f6c03f2..2e32819003 100644 --- a/python/samples/02-agents/context_providers/azure_ai_search/README.md +++ b/python/samples/02-agents/context_providers/azure_ai_search/README.md @@ -8,7 +8,7 @@ This folder contains examples demonstrating how to use the Azure AI Search conte | File | Description | |------|-------------| -| [`search_context_agentic.py`](search_context_agentic.py) | **Agentic mode** (recommended for most scenarios): Uses Knowledge Bases in Azure AI Search for query planning and multi-hop reasoning. Provides more accurate results through intelligent retrieval with automatic query reformulation. Slightly slower with more token consumption for query planning. [Learn more](https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/foundry-iq-boost-response-relevance-by-36-with-agentic-retrieval/4470720) | +| [`search_context_agentic.py`](search_context_agentic.py) | **Agentic mode** (recommended for most scenarios): Uses Knowledge Bases in Azure AI Search for query planning and multi-hop reasoning. Provides more accurate results through intelligent retrieval with automatic query reformulation. Slightly slower with more token consumption for query planning. [Learn more](https://learn.microsoft.com/azure/search/agentic-retrieval-overview) | | [`search_context_semantic.py`](search_context_semantic.py) | **Semantic mode** (fast queries): Fast hybrid search combining vector and keyword search with semantic ranking. Returns raw search results as context. Best for scenarios where speed is critical and simple retrieval is sufficient. | ## Installation @@ -265,4 +265,4 @@ async with Agent( - [RAG with Azure AI Search](https://learn.microsoft.com/azure/search/retrieval-augmented-generation-overview) - [Semantic Search in Azure AI Search](https://learn.microsoft.com/azure/search/semantic-search-overview) - [Knowledge Bases in Azure AI Search](https://learn.microsoft.com/azure/search/knowledge-store-concept-intro) -- [Agentic Retrieval Blog Post](https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/foundry-iq-boost-response-relevance-by-36-with-agentic-retrieval/4470720) +- [Agentic Retrieval in Azure AI Search](https://learn.microsoft.com/azure/search/agentic-retrieval-overview) diff --git a/python/samples/02-agents/context_providers/foundry_toolbox_context_provider.py b/python/samples/02-agents/context_providers/foundry_toolbox_context_provider.py new file mode 100644 index 0000000000..d889c7c1ac --- /dev/null +++ b/python/samples/02-agents/context_providers/foundry_toolbox_context_provider.py @@ -0,0 +1,207 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import os +from typing import Any + +from agent_framework import Agent, AgentSession, ContextProvider, Message, SessionContext +from agent_framework.foundry import ( + FoundryChatClient, + get_toolbox_tool_name, + get_toolbox_tool_type, + select_toolbox_tools, +) +from azure.identity import AzureCliCredential +from dotenv import load_dotenv +from pydantic import BaseModel + +# Load environment variables from .env file +load_dotenv() + +""" +Foundry Toolbox + Context Provider Example + +This sample composes a Foundry toolbox with a ContextProvider so the agent's +tool list is chosen dynamically per-turn. It uses the chat client itself as a lightweight "tool router": the +latest user message plus a short menu of toolbox tools is sent to the model +with a Pydantic ``response_format``, and the returned tool names drive +``select_toolbox_tools``. The toolbox is fetched once and cached on the +provider's state dict; subsequent turns reuse the cache. + +Prerequisites: +- A Microsoft Foundry project +- A toolbox already configured in that project (set TOOLBOX_NAME below) +- FOUNDRY_PROJECT_ENDPOINT and FOUNDRY_MODEL environment variables set +- Azure CLI authentication (`az login`) +""" + +# Replace with your own Foundry toolbox name and version. +TOOLBOX_NAME = "research_toolbox" +# Set to None to resolve the toolbox's current default version at fetch time. +TOOLBOX_VERSION: str | None = None + +# Generic queries that exercise the router without assuming any specific tool +# types are configured. The first is introspective, the second forces a +# non-empty pick for whichever tools the toolbox actually contains, and the +# third should route to nothing. +QUERIES: list[str] = [ + "Introduce yourself and briefly describe the tools you can use to help me.", + "Pick the tool you think is most useful and demonstrate it with a short example.", + "Say hi in one short sentence - no tools needed.", +] + + +def create_sample_toolbox(name: str) -> str: + """Create (or replace) a toolbox version in the Foundry project. + + Toolboxes are normally configured in the Foundry portal or a deployment + script, not the application itself. This helper exists so the sample can + be run end-to-end without first setting a toolbox up by hand — delete any + existing toolbox under ``name``, then create a fresh version containing a + single MCP tool. Returns the created version identifier. + """ + from azure.ai.projects import AIProjectClient + from azure.ai.projects.models import MCPTool, Tool + from azure.core.exceptions import ResourceNotFoundError + + with ( + AzureCliCredential() as credential, + AIProjectClient(credential=credential, endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"]) as project_client, + ): + try: + project_client.beta.toolboxes.delete(name) + print(f"Toolbox `{name}` deleted") + except ResourceNotFoundError: + pass + + tools: list[Tool] = [ + MCPTool( + server_label="api_specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="never", + ) + ] + + created = project_client.beta.toolboxes.create_version( + name=name, + description="Toolbox version with MCP require_approval set to 'never'.", + tools=tools, + ) + print(f"Created toolbox {created.name}@{created.version} ({len(created.tools)} tool(s))") + return created.version + + +class ToolSelection(BaseModel): + """Structured output for the per-turn tool router.""" + + tool_names: list[str] + + +ROUTER_INSTRUCTIONS = ( + "You are a tool router. Given the user's latest message and a menu of " + "available tools (one per line, formatted as 'NAME - TYPE'), return the " + "NAMES of the tools that would plausibly help answer the message. Return " + "an empty list if no tool is needed." +) + + +class DynamicToolboxProvider(ContextProvider): + """Fetches a Foundry toolbox once and lets the model pick tools per-turn.""" + + DEFAULT_SOURCE_ID = "foundry_toolbox" + + def __init__( + self, + source_id: str = DEFAULT_SOURCE_ID, + *, + client: FoundryChatClient, + toolbox_name: str, + toolbox_version: str | None = None, + ) -> None: + super().__init__(source_id) + self._client = client + self._toolbox_name = toolbox_name + self._toolbox_version = toolbox_version + + async def before_run( + self, + *, + agent: Any, + session: AgentSession | None, + context: SessionContext, + state: dict[str, Any], + ) -> None: + """Cache the toolbox on first call, then let the model pick tools per-turn.""" + toolbox = state.get("toolbox") + if toolbox is None: + toolbox = await self._client.get_toolbox(self._toolbox_name, version=self._toolbox_version) + state["toolbox"] = toolbox + print(f"[{self.source_id}] Loaded toolbox {toolbox.name}@{toolbox.version} ({len(toolbox.tools)} tool(s))") + + user_messages = [m for m in context.get_messages(include_input=True) if getattr(m, "role", None) == "user"] + if not user_messages: + context.extend_tools(self.source_id, list(toolbox.tools)) + return + + picks = await self._route_tools(user_messages[-1].text, toolbox.tools) + if picks: + tools = select_toolbox_tools(toolbox, include_names=picks) + print(f"[{self.source_id}] Router picked {sorted(picks)} - surfacing {len(tools)} tool(s)") + else: + tools = list(toolbox.tools) + print(f"[{self.source_id}] Router picked nothing - surfacing all {len(tools)} tool(s)") + context.extend_tools(self.source_id, tools) + + async def _route_tools(self, user_text: str, tools: Any) -> list[str]: + """Ask the model which toolbox tools to surface for this turn.""" + menu = "\n".join(f"- {get_toolbox_tool_name(t)} - {get_toolbox_tool_type(t)}" for t in tools) + prompt = ( + f"User message:\n{user_text}\n\n" + f"Available tools:\n{menu}\n\n" + "Return the names of tools that should be surfaced for this turn." + ) + response = await self._client.get_response( + messages=[Message("user", [prompt])], + options={ + "instructions": ROUTER_INSTRUCTIONS, + "response_format": ToolSelection, + }, + ) + selection: ToolSelection = response.value # type: ignore + return selection.tool_names + + +async def main() -> None: + client = FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=AzureCliCredential(), + ) + + # Comment out if the toolbox already exists in your Foundry project. + create_sample_toolbox(TOOLBOX_NAME) + + toolbox_provider = DynamicToolboxProvider( + client=client, + toolbox_name=TOOLBOX_NAME, + toolbox_version=TOOLBOX_VERSION, + ) + + async with Agent( + client=client, + instructions=( + "You are a helpful assistant. Use the tools available to you on each " + "turn to answer the user. If no tools are relevant, reply directly." + ), + context_providers=[toolbox_provider], + ) as agent: + session = agent.create_session() + + for query in QUERIES: + print(f"\nUser: {query}") + result = await agent.run(query, session=session) + print(f"Assistant: {result}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/02-agents/providers/foundry/README.md b/python/samples/02-agents/providers/foundry/README.md index 2025b0f4fe..120c4d9a1c 100644 --- a/python/samples/02-agents/providers/foundry/README.md +++ b/python/samples/02-agents/providers/foundry/README.md @@ -26,6 +26,8 @@ This folder contains Azure AI Foundry and Foundry Local samples for Agent Framew | [`foundry_chat_client_with_hosted_mcp.py`](foundry_chat_client_with_hosted_mcp.py) | Foundry Chat Client with hosted MCP | | [`foundry_chat_client_with_local_mcp.py`](foundry_chat_client_with_local_mcp.py) | Foundry Chat Client with local MCP | | [`foundry_chat_client_with_session.py`](foundry_chat_client_with_session.py) | Foundry Chat Client with session management | +| [`foundry_chat_client_with_toolbox.py`](foundry_chat_client_with_toolbox.py) | Foundry Chat Client with Foundry toolbox loading and multi-toolbox composition | +| [`foundry_chat_client_with_toolbox_mcp.py`](foundry_chat_client_with_toolbox_mcp.py) | Foundry Chat Client connected to a toolbox via its MCP endpoint using `MCPStreamableHTTPTool` | ## FoundryLocalClient Samples diff --git a/python/samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox.py b/python/samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox.py new file mode 100644 index 0000000000..8a532331ae --- /dev/null +++ b/python/samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox.py @@ -0,0 +1,174 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import os + +from agent_framework import Agent +from agent_framework.foundry import FoundryChatClient, select_toolbox_tools +from azure.identity import AzureCliCredential +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +""" +Foundry Chat Client with Toolbox Example + +This sample demonstrates loading a named, versioned Foundry toolbox into an +Agent via ``FoundryChatClient.get_toolbox()``. A toolbox is a server-side +bundle of tool configurations (code interpreter, file search, MCP, web search, +etc.) configured in the Foundry portal or via the raw SDK. + +Prerequisites: +- A Microsoft Foundry project +- A toolbox already configured in that project (set TOOLBOX_NAME below) +- FOUNDRY_PROJECT_ENDPOINT and FOUNDRY_MODEL environment variables set +""" + +# Replace with your own Foundry toolbox name and version. +TOOLBOX_NAME = "research_toolbox" +TOOLBOX_VERSION = "1" +# Used only by combine_toolboxes() — swap in a second toolbox you own. +SECOND_TOOLBOX_NAME = "analysis_toolbox" +SECOND_TOOLBOX_VERSION = "1" + +# Replace with any question that exercises the tools configured in your toolbox. +QUERY = "Introduce yourself and briefly describe the tools you can use to help me." + + +def create_sample_toolbox(name: str) -> str: + """Create (or replace) a toolbox version in the Foundry project. + + Toolboxes are normally configured in the Foundry portal or a deployment + script, not the application itself. This helper exists so the samples can + be run end-to-end without first setting a toolbox up by hand — delete any + existing toolbox under ``name``, then create a fresh version containing a + single MCP tool. Returns the created version identifier. + """ + from azure.ai.projects import AIProjectClient + from azure.ai.projects.models import MCPTool, Tool + from azure.core.exceptions import ResourceNotFoundError + + with ( + AzureCliCredential() as credential, + AIProjectClient(credential=credential, endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"]) as project_client, + ): + try: + project_client.beta.toolboxes.delete(name) + print(f"Toolbox `{name}` deleted") + except ResourceNotFoundError: + pass + + tools: list[Tool] = [ + MCPTool( + server_label="api_specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="never", + ) + ] + + created = project_client.beta.toolboxes.create_version( + name=name, + description="Toolbox version with MCP require_approval set to 'never'.", + tools=tools, + ) + print(f"Created toolbox {created.name}@{created.version} ({len(created.tools)} tool(s))") + return created.version + + +async def main() -> None: + """Example showing how to use a single Foundry toolbox with FoundryChatClient.""" + print("=== Foundry Chat Client with Toolbox Example ===") + + # For authentication, run `az login` in your terminal or replace + # AzureCliCredential with your preferred authentication option. + client = FoundryChatClient( + credential=AzureCliCredential(), + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + ) + + # Comment out if the toolbox already exists in your Foundry project. + create_sample_toolbox(TOOLBOX_NAME) + + # Omit ``version`` to resolve the toolbox's current default version at runtime. + toolbox = await client.get_toolbox(TOOLBOX_NAME) + print(f"Loaded toolbox {toolbox.name}@{toolbox.version} ({len(toolbox.tools)} tool(s))") + + agent = Agent( + client=client, + instructions="You are a research assistant. Use the available tools to answer questions.", + tools=toolbox, + ) + + print(f"User: {QUERY}") + result = await agent.run(QUERY) + print(f"Result: {result}\n") + + +async def combine_toolboxes() -> None: + """Alternative flow: combine the tools from multiple Foundry toolboxes.""" + client = FoundryChatClient( + credential=AzureCliCredential(), + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + ) + + # Comment out if the toolboxes already exist in your Foundry project. + create_sample_toolbox(TOOLBOX_NAME) + create_sample_toolbox(SECOND_TOOLBOX_NAME) + + toolbox_a = await client.get_toolbox(TOOLBOX_NAME, version=TOOLBOX_VERSION) + toolbox_b = await client.get_toolbox(SECOND_TOOLBOX_NAME, version=SECOND_TOOLBOX_VERSION) + print( + "Loaded toolboxes: " + f"{toolbox_a.name}@{toolbox_a.version} ({len(toolbox_a.tools)} tool(s)), " + f"{toolbox_b.name}@{toolbox_b.version} ({len(toolbox_b.tools)} tool(s))" + ) + + agent = Agent( + client=client, + instructions="You are a research assistant. Use all available tools to answer questions.", + tools=[toolbox_a, toolbox_b], + ) + + print(f"User: {QUERY}") + result = await agent.run(QUERY) + print(f"Combined-toolbox result: {result}\n") + + +async def select_tools_from_toolbox() -> None: + """Alternative flow: keep only a subset of toolbox tools before agent creation.""" + client = FoundryChatClient( + credential=AzureCliCredential(), + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + ) + + # Comment out if the toolbox already exists in your Foundry project. + create_sample_toolbox(TOOLBOX_NAME) + + toolbox = await client.get_toolbox(TOOLBOX_NAME, version=TOOLBOX_VERSION) + print(f"Loaded toolbox {toolbox.name}@{toolbox.version} ({len(toolbox.tools)} tool(s))") + + selected_tools = select_toolbox_tools( + toolbox, + include_types=["code_interpreter", "mcp"], + ) + print(f"Selected {len(selected_tools)} toolbox tools for the agent") + + agent = Agent( + client=client, + instructions="You are a research assistant. Use only the selected toolbox tools.", + tools=selected_tools, + ) + + print(f"User: {QUERY}") + result = await agent.run(QUERY) + print(f"Selected-toolbox result: {result}\n") + + +if __name__ == "__main__": + asyncio.run(main()) + # asyncio.run(combine_toolboxes()) + # asyncio.run(select_tools_from_toolbox()) diff --git a/python/samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox_mcp.py b/python/samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox_mcp.py new file mode 100644 index 0000000000..1fbfe20a9a --- /dev/null +++ b/python/samples/02-agents/providers/foundry/foundry_chat_client_with_toolbox_mcp.py @@ -0,0 +1,118 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import os +from collections.abc import Callable +from typing import Any + +from agent_framework import Agent, MCPStreamableHTTPTool +from agent_framework.foundry import FoundryChatClient +from azure.core.credentials import TokenCredential +from azure.identity import AzureCliCredential, DefaultAzureCredential, get_bearer_token_provider +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +""" +Foundry Toolbox via MAF ``MCPStreamableHTTPTool`` + +Instead of fetching the toolbox and fanning out individual tool specs, point +MAF's ``MCPStreamableHTTPTool`` at the toolbox's MCP endpoint. The agent +discovers and calls the toolbox's tools over MCP at runtime. + +Prerequisites: +- A Microsoft Foundry project with a toolbox configured +- FOUNDRY_PROJECT_ENDPOINT and FOUNDRY_MODEL environment variables set +- FOUNDRY_TOOLBOX_ENDPOINT: the toolbox's MCP endpoint URL, e.g. + ``https://.services.ai.azure.com/api/projects//toolsets//mcp?api-version=v1`` +- Azure CLI authentication (``az login``) +""" + +# Must match the ```` segment of FOUNDRY_TOOLBOX_ENDPOINT. +TOOLBOX_NAME = "research_toolbox" + + +def create_sample_toolbox(name: str) -> str: + """Create (or replace) a toolbox version in the Foundry project. + + Toolboxes are normally configured in the Foundry portal or a deployment + script, not the application itself. This helper exists so the sample can + be run end-to-end without first setting a toolbox up by hand — delete any + existing toolbox under ``name``, then create a fresh version containing a + single MCP tool. Returns the created version identifier. + """ + from azure.ai.projects import AIProjectClient + from azure.ai.projects.models import MCPTool, Tool + from azure.core.exceptions import ResourceNotFoundError + + with ( + AzureCliCredential() as credential, + AIProjectClient(credential=credential, endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"]) as project_client, + ): + try: + project_client.beta.toolboxes.delete(name) + print(f"Toolbox `{name}` deleted") + except ResourceNotFoundError: + pass + + tools: list[Tool] = [ + MCPTool( + server_label="api_specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="never", + ) + ] + + created = project_client.beta.toolboxes.create_version( + name=name, + description="Toolbox version with MCP require_approval set to 'never'.", + tools=tools, + ) + print(f"Created toolbox {created.name}@{created.version} ({len(created.tools)} tool(s))") + return created.version + + +def make_toolbox_header_provider(credential: TokenCredential) -> Callable[[dict[str, Any]], dict[str, str]]: + """Build a header_provider that injects a fresh Azure AI bearer token on every MCP request.""" + get_token = get_bearer_token_provider(credential, "https://ai.azure.com/.default") + + def provide(_kwargs: dict[str, Any]) -> dict[str, str]: + return { + "Authorization": f"Bearer {get_token()}", + } + + return provide + + +async def main() -> None: + credential = DefaultAzureCredential() + + # Comment out if the toolbox already exists in your Foundry project. + create_sample_toolbox(TOOLBOX_NAME) + + toolbox_tool = MCPStreamableHTTPTool( + name="foundry_toolbox", + description="Tools exposed by the configured Foundry toolbox", + url=os.environ["FOUNDRY_TOOLBOX_ENDPOINT"], + header_provider=make_toolbox_header_provider(credential), + load_prompts=False, + ) + + async with Agent( + client=FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=credential, + ), + instructions="You are a helpful assistant. Use the available toolbox tools to answer the user.", + tools=toolbox_tool, + ) as agent: + query = "What tools do you have access to?" + print(f"User: {query}") + result = await agent.run(query) + print(f"Assistant: {result}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/uv.lock b/python/uv.lock index 5978f46c3a..9a680f9231 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -46,6 +46,7 @@ members = [ "agent-framework-foundry-local", "agent-framework-gemini", "agent-framework-github-copilot", + "agent-framework-hyperlight", "agent-framework-lab", "agent-framework-mem0", "agent-framework-ollama", @@ -496,7 +497,7 @@ requires-dist = [ { name = "agent-framework-core", editable = "packages/core" }, { name = "agent-framework-openai", editable = "packages/openai" }, { name = "azure-ai-inference", specifier = ">=1.0.0b9,<1.0.0b10" }, - { name = "azure-ai-projects", specifier = ">=2.0.0,<3.0" }, + { name = "azure-ai-projects", specifier = ">=2.1.0,<3.0" }, ] [[package]] @@ -565,6 +566,25 @@ requires-dist = [ { name = "github-copilot-sdk", marker = "python_full_version >= '3.11'", specifier = ">=0.2.1,<=0.2.1" }, ] +[[package]] +name = "agent-framework-hyperlight" +version = "1.0.0a260409" +source = { editable = "packages/hyperlight" } +dependencies = [ + { name = "agent-framework-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "hyperlight-sandbox", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "hyperlight-sandbox-backend-wasm", marker = "(python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'win32')" }, + { name = "hyperlight-sandbox-python-guest", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, +] + +[package.metadata] +requires-dist = [ + { name = "agent-framework-core", editable = "packages/core" }, + { name = "hyperlight-sandbox", specifier = ">=0.3.0,<0.4" }, + { name = "hyperlight-sandbox-backend-wasm", marker = "(python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'win32')", specifier = ">=0.3.0,<0.4" }, + { name = "hyperlight-sandbox-python-guest", specifier = ">=0.3.0,<0.4" }, +] + [[package]] name = "agent-framework-lab" version = "1.0.0b260409" @@ -1092,7 +1112,7 @@ wheels = [ [[package]] name = "azure-ai-projects" -version = "2.0.1" +version = "2.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, @@ -1102,9 +1122,9 @@ dependencies = [ { name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/86/f9/a15c8a16e35e6d620faebabc6cc4f9e2f4b7f1d962cc6f58931c46947e24/azure_ai_projects-2.0.1.tar.gz", hash = "sha256:c8c64870aa6b89903af69a4ff28b4eff3df9744f14615ea572cae87394946a0c", size = 491774, upload-time = "2026-03-12T19:59:02.712Z" } +sdist = { url = "https://files.pythonhosted.org/packages/72/76/3fdede8eddfe5927a571898a15f0288ba30fee78e5ba099f88df3ded70af/azure_ai_projects-2.1.0.tar.gz", hash = "sha256:f0749fa9a174255aa1a5550fb6078208521518472907a4c6dd552767d9b39caa", size = 543343, upload-time = "2026-04-20T17:06:48.751Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/f7/290ca39501c06c6e23b46ba9f7f3dfb05ecc928cde105fed85d6845060dd/azure_ai_projects-2.0.1-py3-none-any.whl", hash = "sha256:dfda540d256e67a52bf81c75418b6bf92b811b96693fe45787e154a888ad2396", size = 236560, upload-time = "2026-03-12T19:59:04.249Z" }, + { url = "https://files.pythonhosted.org/packages/f7/f6/4984e7772a97c7a9e6505a3de8e55a5070fa2b02cd7e980da91e0d9b9b97/azure_ai_projects-2.1.0-py3-none-any.whl", hash = "sha256:6f259d8eb9167d2dfd372006d0221a8118faeaeb05829fa898b595bc6f19c699", size = 274309, upload-time = "2026-04-20T17:06:50.542Z" }, ] [[package]] @@ -2487,7 +2507,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, - { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -2495,7 +2514,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -2504,7 +2522,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -2513,7 +2530,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -2522,7 +2538,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -2531,7 +2546,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, - { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -2825,6 +2839,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, ] +[[package]] +name = "hyperlight-sandbox" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/fe/ce88996ea3e3e05130d6f0e8cd2ffbe9ab9bf3d9448b7050d4b8d0802b0a/hyperlight_sandbox-0.3.0.tar.gz", hash = "sha256:00491ce267ffbdb206377c79b4afd86510177ad73f4daf2ef7fce02b54eaf801", size = 9251, upload-time = "2026-04-07T03:49:52.542Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/33/e6dcd6729308d13570ae2d3be0e476019a6f3fea387d7549bb1f77ce0408/hyperlight_sandbox-0.3.0-py3-none-any.whl", hash = "sha256:ba8e6779d64e9c187acd93456851ebafaed2f49380e5d132bc0906a4080d2217", size = 5723, upload-time = "2026-04-07T03:49:53.276Z" }, +] + +[[package]] +name = "hyperlight-sandbox-backend-wasm" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/91/c9d68cad7996fdd2f1facef1453156bdd8d52eefa976cc8c827c13029497/hyperlight_sandbox_backend_wasm-0.3.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:eda362f5f737b0823326290d7627c76ce0547a78e70f07f8c9d177e34622fc02", size = 3806454, upload-time = "2026-04-07T03:49:24.238Z" }, + { url = "https://files.pythonhosted.org/packages/9a/6f/6b2399a1caf59dd19b635d99ee1add0c975af7bc3317f5d0f1f9c3f90aa0/hyperlight_sandbox_backend_wasm-0.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:79347b7ae94f2786691b04cb52130dabc5991e0c03b42a24bad8adc766832655", size = 3283951, upload-time = "2026-04-07T03:49:17.137Z" }, + { url = "https://files.pythonhosted.org/packages/23/f2/b380c34a0ce8d486a05adb66757f98cca029e1fb1c96b1c29be0d25d3882/hyperlight_sandbox_backend_wasm-0.3.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:aff9eec4803fb535a140298e2632529f4150fcf3c6ea3ff2ae4571572a836116", size = 3806601, upload-time = "2026-04-07T03:49:22.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/5a/fb78cfd934e0523887b8d5b073b7b2aed3b545add21cda3aa95929ac1659/hyperlight_sandbox_backend_wasm-0.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:b6151704dd19862c9869b115752b4504b45d0b2eeb46aa9385a1a3b8be11cfa8", size = 3284164, upload-time = "2026-04-07T03:49:18.556Z" }, + { url = "https://files.pythonhosted.org/packages/21/bc/4e21f5c7ccd9307ac63a61c71b62a57ee4a9e6eec77fc72ff072907a21f5/hyperlight_sandbox_backend_wasm-0.3.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:cfd1d22ce221774d82a5174d268d56ff70fc1a23fb993a6491358b5d0ed169bf", size = 3802901, upload-time = "2026-04-07T03:49:19.845Z" }, + { url = "https://files.pythonhosted.org/packages/9a/41/646be9b0c7bb0f9192e45a77414673aa414eb316c92b5312efe6fb4ce802/hyperlight_sandbox_backend_wasm-0.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:229ab494a422f2de895a2a27ad6a6a2daed710ea062d7c213878bbe5f5b32fa7", size = 3281220, upload-time = "2026-04-07T03:49:21.368Z" }, + { url = "https://files.pythonhosted.org/packages/74/3a/f8ec4a41fffba4036dfc3cbddc3dfb6e87466b01afe1cb0a50cc6a0f0eed/hyperlight_sandbox_backend_wasm-0.3.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b91905ee2ddd36a78b0dd13b1a62be99a995a45121587c111692591e40b36912", size = 3802789, upload-time = "2026-04-07T03:49:15.614Z" }, + { url = "https://files.pythonhosted.org/packages/3c/62/dfa8c15102f9b8ec5c3b5ffb54b99d60c75e7a6e4d00540757656bc5a5d8/hyperlight_sandbox_backend_wasm-0.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:eff682761c3b86abfe7e0d523ea0e6d5c7e8299302917c53918743b82c9d1ea2", size = 3280501, upload-time = "2026-04-07T03:49:13.939Z" }, +] + +[[package]] +name = "hyperlight-sandbox-python-guest" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/23/6a/f182c4315d31a98dd3b82f9274638e3adb399779584af93c5087bb2f814f/hyperlight_sandbox_python_guest-0.3.0.tar.gz", hash = "sha256:b1de5d8e87375dc6bef744ecd7ae2a7f43d5f6b913b4e990e9872bd439c0b19e", size = 21554625, upload-time = "2026-04-07T03:49:42.672Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/8e/4cd754928464f56528645c7421ccbb3fcbe45ad2542f899712b0f2f2c0e1/hyperlight_sandbox_python_guest-0.3.0-py3-none-any.whl", hash = "sha256:3c55a7420666ad9a208893dbdf7ad1b5c8ad4f3a94b1a56e64979719c7ce95c1", size = 21716481, upload-time = "2026-04-07T03:49:39.885Z" }, +] + [[package]] name = "idna" version = "3.11"