// Copyright (c) Microsoft. All rights reserved. using OpenAI.Responses; namespace Demo.ComputerUse; /// /// Enum for tracking the state of the simulated web search flow. /// internal enum SearchState { Initial, // Browser search page Typed, // Text entered in search box PressedEnter // Enter key pressed, transitioning to results } internal static class ComputerUseUtil { /// /// Load and convert screenshot images to base64 data URLs. /// internal static Dictionary LoadScreenshotAssets() { string baseDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Assets"); ReadOnlySpan<(string key, string fileName)> screenshotFiles = [ ("browser_search", "cua_browser_search.png"), ("search_typed", "cua_search_typed.png"), ("search_results", "cua_search_results.png") ]; Dictionary screenshots = []; foreach (var (key, fileName) in screenshotFiles) { string fullPath = Path.GetFullPath(Path.Combine(baseDir, fileName)); screenshots[key] = File.ReadAllBytes(fullPath); } return screenshots; } /// /// Process a computer action and simulate its execution. /// internal static (SearchState CurrentState, byte[] ImageBytes) HandleComputerActionAndTakeScreenshot( ComputerCallAction action, SearchState currentState, Dictionary screenshots) { Console.WriteLine($"Simulating the execution of computer action: {action.Kind}"); SearchState newState = DetermineNextState(action, currentState); string imageKey = GetImageKey(newState); return (newState, screenshots[imageKey]); } private static SearchState DetermineNextState(ComputerCallAction action, SearchState currentState) { string actionType = action.Kind.ToString(); if (actionType.Equals("type", StringComparison.OrdinalIgnoreCase) && action.TypeText is not null) { return SearchState.Typed; } if (IsEnterKeyAction(action, actionType)) { Console.WriteLine(" -> Detected ENTER key press"); return SearchState.PressedEnter; } if (actionType.Equals("click", StringComparison.OrdinalIgnoreCase) && currentState == SearchState.Typed) { Console.WriteLine(" -> Detected click after typing"); return SearchState.PressedEnter; } return currentState; } private static bool IsEnterKeyAction(ComputerCallAction action, string actionType) { return (actionType.Equals("key", StringComparison.OrdinalIgnoreCase) || actionType.Equals("keypress", StringComparison.OrdinalIgnoreCase)) && action.KeyPressKeyCodes is not null && (action.KeyPressKeyCodes.Contains("Return", StringComparer.OrdinalIgnoreCase) || action.KeyPressKeyCodes.Contains("Enter", StringComparer.OrdinalIgnoreCase)); } private static string GetImageKey(SearchState state) => state switch { SearchState.PressedEnter => "search_results", SearchState.Typed => "search_typed", _ => "browser_search" }; }