// Copyright (c) Microsoft. All rights reserved. // This sample demonstrates writing custom evaluation functions for domain-specific // checks. Custom evaluators run locally — no cloud evaluator service needed. // For LLM-based quality scoring (relevance, coherence), see Evaluation_SimpleEval. using Azure.AI.Projects; using Azure.Identity; using Microsoft.Agents.AI; string endpoint = Environment.GetEnvironmentVariable("AZURE_AI_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_AI_PROJECT_ENDPOINT is not set."); string deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_MODEL_DEPLOYMENT_NAME") ?? "gpt-4o-mini"; // WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production. // In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid // latency issues, unintended credential probing, and potential security risks from fallback mechanisms. AIProjectClient projectClient = new(new Uri(endpoint), new DefaultAzureCredential()); AIAgent agent = projectClient.AsAIAgent( model: deploymentName, instructions: "You are a customer support agent. Help users resolve their issues " + "politely and provide clear, actionable steps.", name: "SupportAgent"); // Custom check: the agent should not refuse to help. EvalCheck noRefusal = FunctionEvaluator.Create("no_refusal", (string response) => !response.Contains("I can't help", StringComparison.OrdinalIgnoreCase) && !response.Contains("I'm unable to", StringComparison.OrdinalIgnoreCase) && !response.Contains("outside my scope", StringComparison.OrdinalIgnoreCase)); // Custom check: response should include actionable guidance (numbered steps or bullet points). EvalCheck hasActionableSteps = FunctionEvaluator.Create("has_actionable_steps", (string response) => response.Contains("1.", StringComparison.Ordinal) || response.Contains("- ", StringComparison.Ordinal) || response.Contains("• ", StringComparison.Ordinal)); // Custom check: response should be substantial but not excessively long. EvalCheck reasonableLength = FunctionEvaluator.Create("reasonable_length", (string response) => response.Length >= 50 && response.Length <= 2000); // Combine all custom checks into a local evaluator. LocalEvaluator evaluator = new(noRefusal, hasActionableSteps, reasonableLength); string[] queries = [ "My order hasn't arrived after two weeks. What should I do?", "I was charged twice for the same item. Can you help?", "How do I return a damaged product?", ]; AgentEvaluationResults results = await agent.EvaluateAsync(queries, evaluator); Console.WriteLine($"Passed: {results.Passed}/{results.Total}"); Console.WriteLine(); for (int i = 0; i < results.Items.Count; i++) { Console.WriteLine($"Query: {queries[i]}"); Console.WriteLine($"Response: {(results.InputItems?[i].Response is { } resp ? resp.Substring(0, Math.Min(50, resp.Length)) : "N/A")}..."); foreach (var metric in results.Items[i].Metrics) { string status = metric.Value.Interpretation?.Failed == true ? "FAIL" : "PASS"; Console.WriteLine($" [{status}] {metric.Key}"); } Console.WriteLine(); }