mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
c79f886dc3
* dotnet: refresh Foundry sample guidance Carry forward the still-relevant sample guidance and Foundry-specific documentation fixes from the old stacked sample migration work, adapted to the current repo layout and policy. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * dotnet: rename Foundry sample env vars Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * dotnet: remove persistent provider sample Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * dotnet: drop SAMPLE_GUIDELINES.md from this PR Defer the guidelines doc and its cross-link to a follow-on PR to avoid broken-link failures in CI. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * dotnet: add DefaultAzureCredential warning to remaining samples Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * dotnet: address PR review feedback Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
113 lines
4.6 KiB
C#
113 lines
4.6 KiB
C#
// Copyright (c) Microsoft. All rights reserved.
|
|
|
|
// This sample shows how to use the Computer Use tool with AIProjectClient.AsAIAgent(...).
|
|
|
|
using Azure.AI.Projects;
|
|
using Azure.Identity;
|
|
using Demo.ComputerUse;
|
|
using Microsoft.Agents.AI;
|
|
using Microsoft.Agents.AI.Foundry;
|
|
using Microsoft.Extensions.AI;
|
|
using OpenAI.Responses;
|
|
|
|
string endpoint = Environment.GetEnvironmentVariable("FOUNDRY_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("FOUNDRY_PROJECT_ENDPOINT is not set.");
|
|
string deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_COMPUTER_USE_DEPLOYMENT_NAME") ?? "computer-use-preview";
|
|
|
|
// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
|
|
// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid
|
|
// latency issues, unintended credential probing, and potential security risks from fallback mechanisms.
|
|
AIProjectClient projectClient = new(new Uri(endpoint), new DefaultAzureCredential());
|
|
using IHostedFileClient fileClient = projectClient.GetProjectOpenAIClient().AsIHostedFileClient();
|
|
|
|
AIAgent agent = projectClient.AsAIAgent(
|
|
model: deploymentName,
|
|
name: "ComputerAgent",
|
|
instructions: "You are a computer automation assistant.",
|
|
tools: [FoundryAITool.CreateComputerTool(ComputerToolEnvironment.Browser, 1026, 769)]);
|
|
|
|
Dictionary<string, string> screenshots = [];
|
|
|
|
try
|
|
{
|
|
// Upload pre-captured screenshots that simulate browser state transitions.
|
|
screenshots = await ComputerUseUtil.UploadScreenshotAssetsAsync(fileClient);
|
|
|
|
// Enable auto-truncation for the Responses API.
|
|
ChatClientAgentRunOptions runOptions = new()
|
|
{
|
|
ChatOptions = new ChatOptions
|
|
{
|
|
RawRepresentationFactory = (_) => new CreateResponseOptions() { TruncationMode = ResponseTruncationMode.Auto },
|
|
}
|
|
};
|
|
|
|
// Send the initial request with a screenshot of the browser.
|
|
ChatMessage message = new(ChatRole.User, [
|
|
new TextContent("Search for 'OpenAI news'. Type it and submit. Once you see results, the task is complete."),
|
|
new AIContent() { RawRepresentation = ResponseContentPart.CreateInputImagePart(imageFileId: screenshots["browser_search"], imageDetailLevel: ResponseImageDetailLevel.High) }
|
|
]);
|
|
|
|
Console.WriteLine("Starting computer use session...");
|
|
|
|
AgentSession session = await agent.CreateSessionAsync();
|
|
AgentResponse response = await agent.RunAsync(message, session: session, options: runOptions);
|
|
|
|
SearchState currentState = SearchState.Initial;
|
|
|
|
for (int i = 0; i < 10; i++)
|
|
{
|
|
// Find the next computer call action.
|
|
ComputerCallResponseItem? computerCall = response.Messages
|
|
.SelectMany(m => m.Contents)
|
|
.Select(c => c.RawRepresentation as ComputerCallResponseItem)
|
|
.FirstOrDefault(item => item is not null);
|
|
|
|
if (computerCall is null)
|
|
{
|
|
if (currentState == SearchState.PressedEnter)
|
|
{
|
|
Console.WriteLine("No more computer actions. Done.");
|
|
Console.WriteLine(response);
|
|
break;
|
|
}
|
|
|
|
// Check if the agent is asking for confirmation to proceed, and if so, respond affirmatively.
|
|
TextContent? textContent = response.Messages
|
|
.Where(m => m.Role == ChatRole.Assistant)
|
|
.SelectMany(m => m.Contents.OfType<TextContent>())
|
|
.FirstOrDefault();
|
|
|
|
if (textContent?.Text is { } text && (
|
|
text.Contains("Would you like me") ||
|
|
text.Contains("Should I") ||
|
|
text.Contains("proceed") ||
|
|
text.Contains('?')))
|
|
{
|
|
response = await agent.RunAsync("Please proceed.", session, runOptions);
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
Console.WriteLine($"[{i + 1}] Action: {computerCall!.Action.Kind}");
|
|
|
|
// Simulate the action and get the resulting screenshot.
|
|
(currentState, string fileId) = await ComputerUseUtil.GetScreenshotAsync(computerCall.Action, currentState, screenshots);
|
|
|
|
// Send the screenshot back as the computer call output.
|
|
AIContent callOutput = new()
|
|
{
|
|
RawRepresentation = new ComputerCallOutputResponseItem(
|
|
computerCall.CallId,
|
|
output: ComputerCallOutput.CreateScreenshotOutput(screenshotImageFileId: fileId))
|
|
};
|
|
|
|
response = await agent.RunAsync([new ChatMessage(ChatRole.User, [callOutput])], session: session, options: runOptions);
|
|
}
|
|
}
|
|
finally
|
|
{
|
|
await ComputerUseUtil.EnsureDeleteScreenshotAssetsAsync(fileClient, screenshots);
|
|
}
|