mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
628bb1af48
* Update Foundry Responses as ChatClientAgent * Migrate obsolete AzureAI integration tests to versioned agent pattern Replace obsolete CreateAIAgentAsync/GetAIAgentAsync calls with Agents.CreateAgentVersionAsync() + AsAIAgent(AgentVersion) in all AzureAI integration tests. - Rename AIProjectClient* test files to FoundryVersionedAgent* - Register AIFunction tools in PromptAgentDefinition.Tools for server-side visibility via AsOpenAIResponseTool() - Skip structured output tests (AzureAIProjectChatClient clears ResponseFormat for versioned agents) - Remove all [Obsolete] attributes and #pragma warning disable CS0618 * Merge FoundryMemory package into AzureAI under Memory/ folder Move all FoundryMemory source, unit tests, and integration tests into the Microsoft.Agents.AI.AzureAI package. Change namespace from Microsoft.Agents.AI.FoundryMemory to Microsoft.Agents.AI.AzureAI. - Add [Experimental] to FoundryMemoryProviderOptions and Scope - Rename internal AIProjectClientExtensions to MemoryStoreExtensions - Update AzureAI .csproj with Compliance.Abstractions, Redaction - Remove FoundryMemory from solution and release filter - Update sample to reference AzureAI instead of FoundryMemory - Delete old Microsoft.Agents.AI.FoundryMemory project and tests * Add EnsureMemoryStoreCreatedAsync and memory existence checks to integration tests - Ensure memory store is created before testing memory operations - Add AZURE_AI_EMBEDDING_DEPLOYMENT_NAME config setting - Assert memories exist in store via SearchMemoriesAsync before cleanup - Verify scope isolation with direct memory store queries * Fix and rename AzureAI unit tests for RAPI vs Versioned clarity - Rename AsAIAgentAsync_* to AsAIAgent_* (drop Async from method group) - Add _Rapi_ prefix to non-versioned (Responses API) tests - Add _Versioned_ prefix to versioned agent tests where needed - Fix RAPI tests: assert GetService<AIProjectClient>() is null - Fix Versioned tests: assert IsType<FoundryAgent> and GetService<AIProjectClient>() returns the client instance - Fix UserAgent header tests: proper HTTP handler routing - Fix ChatClient_UsesDefaultConversationIdAsync test setup - All 153 unit tests pass with 0 failures * Rename Microsoft.Agents.AI.AzureAI to Microsoft.Agents.AI.Foundry Rename the project, namespace, folder, and all references from Microsoft.Agents.AI.AzureAI to Microsoft.Agents.AI.Foundry. Also rename Workflows.Declarative.AzureAI to .Foundry. - Rename src, unit test, integration test, and workflow folders - Update namespaces in all source and test .cs files - Update ProjectReferences in ~47 sample and test .csproj files - Update solution files (.slnx, .slnf) - Update sample using statements - Update READMEs, SKILL.md, ADRs in docs/ - Disable package validation baseline for renamed packages - Fix UTF-8 BOM encoding on all affected .cs files - AzureAI.Persistent left completely unchanged * Fix format: remove ImplicitUsings, add explicit usings, fix BOM encoding - Remove ImplicitUsings=enable from Foundry csproj to resolve IDE0005 on shared ReplacingRedactor.cs - Add explicit System usings to all source files that relied on them - Sort usings alphabetically per editorconfig rules - Fix UTF-8 BOM on 12 sample Program.cs files - Rename Azure AI Foundry Agents to Microsoft Foundry Agents in docs
147 lines
6.3 KiB
C#
147 lines
6.3 KiB
C#
// Copyright (c) Microsoft. All rights reserved.
|
|
|
|
// This sample shows how to use Computer Use Tool with a ChatClientAgent.
|
|
|
|
using Azure.AI.Projects;
|
|
using Azure.Identity;
|
|
using Microsoft.Agents.AI;
|
|
using Microsoft.Agents.AI.Foundry;
|
|
using Microsoft.Extensions.AI;
|
|
using OpenAI.Responses;
|
|
|
|
namespace Demo.ComputerUse;
|
|
|
|
internal sealed class Program
|
|
{
|
|
private static async Task Main(string[] args)
|
|
{
|
|
const string AgentInstructions = @"
|
|
You are a computer automation assistant.
|
|
|
|
Be direct and efficient. When you reach the search results page, read and describe the actual search result titles and descriptions you can see.
|
|
";
|
|
|
|
const string AgentName = "ComputerAgent-RAPI";
|
|
|
|
string endpoint = Environment.GetEnvironmentVariable("AZURE_AI_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_AI_PROJECT_ENDPOINT is not set.");
|
|
string deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_MODEL_DEPLOYMENT_NAME") ?? "computer-use-preview";
|
|
|
|
// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
|
|
// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid
|
|
// latency issues, unintended credential probing, and potential security risks from fallback mechanisms.
|
|
AIProjectClient aiProjectClient = new(new Uri(endpoint), new DefaultAzureCredential());
|
|
|
|
// Create a AIAgent with ComputerUseTool.
|
|
AIAgent agent = aiProjectClient.AsAIAgent(deploymentName,
|
|
instructions: AgentInstructions,
|
|
name: AgentName,
|
|
description: "Computer automation agent with screen interaction capabilities.",
|
|
tools: [
|
|
FoundryAITool.CreateComputerTool(ComputerToolEnvironment.Browser, 1026, 769),
|
|
]);
|
|
|
|
await InvokeComputerUseAgentAsync(agent);
|
|
}
|
|
|
|
private static async Task InvokeComputerUseAgentAsync(AIAgent agent)
|
|
{
|
|
// Load screenshot assets
|
|
Dictionary<string, byte[]> screenshots = ComputerUseUtil.LoadScreenshotAssets();
|
|
|
|
ChatOptions chatOptions = new();
|
|
CreateResponseOptions responseCreationOptions = new()
|
|
{
|
|
TruncationMode = ResponseTruncationMode.Auto
|
|
};
|
|
chatOptions.RawRepresentationFactory = (_) => responseCreationOptions;
|
|
ChatClientAgentRunOptions runOptions = new(chatOptions)
|
|
{
|
|
AllowBackgroundResponses = true,
|
|
};
|
|
|
|
ChatMessage message = new(ChatRole.User, [
|
|
new TextContent("I need you to help me search for 'OpenAI news'. Please type 'OpenAI news' and submit the search. Once you see search results, the task is complete."),
|
|
new DataContent(new BinaryData(screenshots["browser_search"]), "image/png")
|
|
]);
|
|
|
|
// Initial request with screenshot - start with Bing search page
|
|
Console.WriteLine("Starting computer automation session (initial screenshot: cua_browser_search.png)...");
|
|
|
|
// We use PreviousResponseId to chain calls, sending only the new computer_call_output items
|
|
// instead of re-sending the full context.
|
|
AgentSession session = await agent.CreateSessionAsync();
|
|
AgentResponse response = await agent.RunAsync(message, session: session, options: runOptions);
|
|
|
|
// Main interaction loop
|
|
const int MaxIterations = 10;
|
|
int iteration = 0;
|
|
// Initialize state machine
|
|
SearchState currentState = SearchState.Initial;
|
|
|
|
while (true)
|
|
{
|
|
// Poll until the response is complete.
|
|
while (response.ContinuationToken is { } token)
|
|
{
|
|
// Wait before polling again.
|
|
await Task.Delay(TimeSpan.FromSeconds(2));
|
|
|
|
// Continue with the token.
|
|
runOptions.ContinuationToken = token;
|
|
|
|
response = await agent.RunAsync(session, runOptions);
|
|
}
|
|
|
|
// Clear the continuation token so the next RunAsync call is a fresh request.
|
|
runOptions.ContinuationToken = null;
|
|
|
|
Console.WriteLine($"Agent response received (ID: {response.ResponseId})");
|
|
|
|
if (iteration >= MaxIterations)
|
|
{
|
|
Console.WriteLine($"\nReached maximum iterations ({MaxIterations}). Stopping.");
|
|
break;
|
|
}
|
|
|
|
iteration++;
|
|
Console.WriteLine($"\n--- Iteration {iteration} ---");
|
|
|
|
// Check for computer calls in the response
|
|
IEnumerable<ComputerCallResponseItem> computerCallResponseItems = response.Messages
|
|
.SelectMany(x => x.Contents)
|
|
.Where(c => c.RawRepresentation is ComputerCallResponseItem and not null)
|
|
.Select(c => (ComputerCallResponseItem)c.RawRepresentation!);
|
|
|
|
ComputerCallResponseItem? firstComputerCall = computerCallResponseItems.FirstOrDefault();
|
|
if (firstComputerCall is null)
|
|
{
|
|
Console.WriteLine("No computer call actions found. Ending interaction.");
|
|
Console.WriteLine($"Final Response: {response}");
|
|
break;
|
|
}
|
|
|
|
// Process the first computer call response
|
|
ComputerCallAction action = firstComputerCall.Action;
|
|
string currentCallId = firstComputerCall.CallId;
|
|
|
|
Console.WriteLine($"Processing computer call (ID: {currentCallId})");
|
|
|
|
// Simulate executing the action and taking a screenshot
|
|
(SearchState CurrentState, byte[] ImageBytes) screenInfo = ComputerUseUtil.HandleComputerActionAndTakeScreenshot(action, currentState, screenshots);
|
|
currentState = screenInfo.CurrentState;
|
|
|
|
Console.WriteLine("Sending action result back to agent...");
|
|
|
|
// Send only the computer_call_output — the session carries PreviousResponseId for context continuity.
|
|
AIContent callOutput = new()
|
|
{
|
|
RawRepresentation = new ComputerCallOutputResponseItem(
|
|
currentCallId,
|
|
output: ComputerCallOutput.CreateScreenshotOutput(new BinaryData(screenInfo.ImageBytes), "image/png"))
|
|
};
|
|
|
|
response = await agent.RunAsync([new ChatMessage(ChatRole.User, [callOutput])], session: session, options: runOptions);
|
|
}
|
|
}
|
|
}
|