mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
.NET: Create a sample to show bounded chat history with overflow into chat history memory (#4136)
* Create a sample to show bounded chat history with overflow into chat history memory * Address PR comments. * Address PR comment and fix bug
This commit is contained in:
committed by
GitHub
Unverified
parent
394e9c1692
commit
c8750cbe92
@@ -103,6 +103,7 @@
|
||||
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step01_ChatHistoryMemory/AgentWithMemory_Step01_ChatHistoryMemory.csproj" />
|
||||
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step02_MemoryUsingMem0/AgentWithMemory_Step02_MemoryUsingMem0.csproj" />
|
||||
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step04_MemoryUsingFoundry/AgentWithMemory_Step04_MemoryUsingFoundry.csproj" />
|
||||
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/AgentWithMemory_Step05_BoundedChatHistory.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/Samples/02-agents/AgentWithOpenAI/">
|
||||
<File Path="samples/02-agents/AgentWithOpenAI/README.md" />
|
||||
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFrameworks>net10.0</TargetFrameworks>
|
||||
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Azure.AI.OpenAI" />
|
||||
<PackageReference Include="Azure.Identity" />
|
||||
<PackageReference Include="Microsoft.Extensions.AI.OpenAI" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel.Connectors.InMemory" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\..\src\Microsoft.Agents.AI.OpenAI\Microsoft.Agents.AI.OpenAI.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
+133
@@ -0,0 +1,133 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using Microsoft.Agents.AI;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Extensions.VectorData;
|
||||
|
||||
namespace SampleApp;
|
||||
|
||||
/// <summary>
|
||||
/// A <see cref="ChatHistoryProvider"/> that keeps a bounded window of recent messages in session state
|
||||
/// (via <see cref="InMemoryChatHistoryProvider"/>) and overflows older messages to a vector store
|
||||
/// (via <see cref="ChatHistoryMemoryProvider"/>). When providing chat history, it searches the vector
|
||||
/// store for relevant older messages and prepends them as a memory context message.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Only non-system messages are counted towards the session state limit and overflow mechanism. System messages are always retained in session state and are not included in the vector store.
|
||||
/// Function calls and function results are also dropped when truncation happens, both from in-memory state, and they are also not persisted to the vector store.
|
||||
/// </remarks>
|
||||
internal sealed class BoundedChatHistoryProvider : ChatHistoryProvider, IDisposable
|
||||
{
|
||||
private readonly InMemoryChatHistoryProvider _chatHistoryProvider;
|
||||
private readonly ChatHistoryMemoryProvider _memoryProvider;
|
||||
private readonly TruncatingChatReducer _reducer;
|
||||
private readonly string _contextPrompt;
|
||||
private IReadOnlyList<string>? _stateKeys;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="BoundedChatHistoryProvider"/> class.
|
||||
/// </summary>
|
||||
/// <param name="maxSessionMessages">The maximum number of non-system messages to keep in session state before overflowing to the vector store.</param>
|
||||
/// <param name="vectorStore">The vector store to use for storing and retrieving overflow chat history.</param>
|
||||
/// <param name="collectionName">The name of the collection for storing overflow chat history in the vector store.</param>
|
||||
/// <param name="vectorDimensions">The number of dimensions to use for the chat history vector store embeddings.</param>
|
||||
/// <param name="stateInitializer">A delegate that initializes the memory provider state, providing the storage and search scopes.</param>
|
||||
/// <param name="contextPrompt">Optional prompt to prefix memory search results. Defaults to a standard memory context prompt.</param>
|
||||
public BoundedChatHistoryProvider(
|
||||
int maxSessionMessages,
|
||||
VectorStore vectorStore,
|
||||
string collectionName,
|
||||
int vectorDimensions,
|
||||
Func<AgentSession?, ChatHistoryMemoryProvider.State> stateInitializer,
|
||||
string? contextPrompt = null)
|
||||
{
|
||||
if (maxSessionMessages < 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(maxSessionMessages), "maxSessionMessages must be non-negative.");
|
||||
}
|
||||
|
||||
this._reducer = new TruncatingChatReducer(maxSessionMessages);
|
||||
this._chatHistoryProvider = new InMemoryChatHistoryProvider(new InMemoryChatHistoryProviderOptions
|
||||
{
|
||||
ChatReducer = this._reducer,
|
||||
ReducerTriggerEvent = InMemoryChatHistoryProviderOptions.ChatReducerTriggerEvent.AfterMessageAdded,
|
||||
StorageInputRequestMessageFilter = msgs => msgs,
|
||||
});
|
||||
this._memoryProvider = new ChatHistoryMemoryProvider(
|
||||
vectorStore,
|
||||
collectionName,
|
||||
vectorDimensions,
|
||||
stateInitializer,
|
||||
options: new ChatHistoryMemoryProviderOptions
|
||||
{
|
||||
SearchInputMessageFilter = msgs => msgs,
|
||||
StorageInputRequestMessageFilter = msgs => msgs,
|
||||
});
|
||||
this._contextPrompt = contextPrompt
|
||||
?? "The following are memories from earlier in this conversation. Use them to inform your responses:";
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override IReadOnlyList<string> StateKeys => this._stateKeys ??= this._chatHistoryProvider.StateKeys.Concat(this._memoryProvider.StateKeys).ToArray();
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async ValueTask<IEnumerable<ChatMessage>> ProvideChatHistoryAsync(
|
||||
InvokingContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Delegate to the inner provider's full lifecycle (retrieve, filter, stamp, merge with request messages).
|
||||
var chatHistoryProviderInputContext = new InvokingContext(context.Agent, context.Session, []);
|
||||
var allMessages = await this._chatHistoryProvider.InvokingAsync(chatHistoryProviderInputContext, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Search the vector store for relevant older messages.
|
||||
var aiContext = new AIContext { Messages = context.RequestMessages.ToList() };
|
||||
var invokingContext = new AIContextProvider.InvokingContext(
|
||||
context.Agent, context.Session, aiContext);
|
||||
|
||||
var result = await this._memoryProvider.InvokingAsync(invokingContext, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Extract only the messages added by the memory provider (stamped with AIContextProvider source type).
|
||||
var memoryMessages = result.Messages?
|
||||
.Where(m => m.GetAgentRequestMessageSourceType() == AgentRequestMessageSourceType.AIContextProvider)
|
||||
.ToList();
|
||||
|
||||
if (memoryMessages is { Count: > 0 })
|
||||
{
|
||||
var memoryText = string.Join("\n", memoryMessages.Select(m => m.Text).Where(t => !string.IsNullOrWhiteSpace(t)));
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(memoryText))
|
||||
{
|
||||
var contextMessage = new ChatMessage(ChatRole.User, $"{this._contextPrompt}\n{memoryText}");
|
||||
return new[] { contextMessage }.Concat(allMessages);
|
||||
}
|
||||
}
|
||||
|
||||
return allMessages;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async ValueTask StoreChatHistoryAsync(
|
||||
InvokedContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Delegate storage to the in-memory provider. Its TruncatingChatReducer (AfterMessageAdded trigger)
|
||||
// will automatically truncate to the configured maximum and expose any removed messages.
|
||||
var innerContext = new InvokedContext(
|
||||
context.Agent, context.Session, context.RequestMessages, context.ResponseMessages!);
|
||||
await this._chatHistoryProvider.InvokedAsync(innerContext, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Archive any messages that the reducer removed to the vector store.
|
||||
if (this._reducer.RemovedMessages is { Count: > 0 })
|
||||
{
|
||||
var overflowContext = new AIContextProvider.InvokedContext(
|
||||
context.Agent, context.Session, this._reducer.RemovedMessages, []);
|
||||
await this._memoryProvider.InvokedAsync(overflowContext, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Dispose()
|
||||
{
|
||||
this._memoryProvider.Dispose();
|
||||
}
|
||||
}
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
// This sample shows how to create a bounded chat history provider that keeps a configurable number of
|
||||
// recent messages in session state and automatically overflows older messages to a vector store.
|
||||
// When the agent is invoked, it searches the vector store for relevant older messages and
|
||||
// prepends them as a "memory" context message before the recent session history.
|
||||
|
||||
using Azure.AI.OpenAI;
|
||||
using Azure.Identity;
|
||||
using Microsoft.Agents.AI;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Extensions.VectorData;
|
||||
using Microsoft.SemanticKernel.Connectors.InMemory;
|
||||
using OpenAI.Chat;
|
||||
using SampleApp;
|
||||
|
||||
var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("AZURE_OPENAI_ENDPOINT is not set.");
|
||||
var deploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT_NAME") ?? "gpt-4o-mini";
|
||||
var embeddingDeploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME") ?? "text-embedding-3-large";
|
||||
|
||||
// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
|
||||
// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid
|
||||
// latency issues, unintended credential probing, and potential security risks from fallback mechanisms.
|
||||
var credential = new DefaultAzureCredential();
|
||||
|
||||
// Create a vector store to store overflow chat messages.
|
||||
// For demonstration purposes, we are using an in-memory vector store.
|
||||
// Replace this with a persistent vector store implementation for production scenarios.
|
||||
VectorStore vectorStore = new InMemoryVectorStore(new InMemoryVectorStoreOptions()
|
||||
{
|
||||
EmbeddingGenerator = new AzureOpenAIClient(new Uri(endpoint), credential)
|
||||
.GetEmbeddingClient(embeddingDeploymentName)
|
||||
.AsIEmbeddingGenerator()
|
||||
});
|
||||
|
||||
var sessionId = Guid.NewGuid().ToString();
|
||||
|
||||
// Create the BoundedChatHistoryProvider with a maximum of 4 non-system messages in session state.
|
||||
// It internally creates an InMemoryChatHistoryProvider with a TruncatingChatReducer and a
|
||||
// ChatHistoryMemoryProvider with the correct configuration to ensure overflow messages are
|
||||
// automatically archived to the vector store and recalled via semantic search.
|
||||
var boundedProvider = new BoundedChatHistoryProvider(
|
||||
maxSessionMessages: 4,
|
||||
vectorStore,
|
||||
collectionName: "chathistory-overflow",
|
||||
vectorDimensions: 3072,
|
||||
session => new ChatHistoryMemoryProvider.State(
|
||||
storageScope: new() { UserId = "UID1", SessionId = sessionId },
|
||||
searchScope: new() { UserId = "UID1" }));
|
||||
|
||||
// Create the agent with the bounded chat history provider.
|
||||
AIAgent agent = new AzureOpenAIClient(new Uri(endpoint), credential)
|
||||
.GetChatClient(deploymentName)
|
||||
.AsAIAgent(new ChatClientAgentOptions
|
||||
{
|
||||
ChatOptions = new() { Instructions = "You are a helpful assistant. Answer questions concisely." },
|
||||
Name = "Assistant",
|
||||
ChatHistoryProvider = boundedProvider,
|
||||
});
|
||||
|
||||
// Start a conversation. The first several exchanges will fill up the session state window.
|
||||
AgentSession session = await agent.CreateSessionAsync();
|
||||
|
||||
Console.WriteLine("--- Filling the session window (4 messages max) ---\n");
|
||||
|
||||
Console.WriteLine(await agent.RunAsync("My favorite color is blue.", session));
|
||||
Console.WriteLine(await agent.RunAsync("I have a dog named Max.", session));
|
||||
|
||||
// At this point the session state holds 4 messages (2 user + 2 assistant).
|
||||
// The next exchange will push the oldest messages into the vector store.
|
||||
Console.WriteLine("\n--- Next exchange will trigger overflow to vector store ---\n");
|
||||
|
||||
Console.WriteLine(await agent.RunAsync("What is the capital of France?", session));
|
||||
|
||||
// The oldest messages about favorite color have now been archived to the vector store.
|
||||
// Ask the agent something that requires recalling the overflowed information.
|
||||
Console.WriteLine("\n--- Asking about overflowed information (should recall from vector store) ---\n");
|
||||
|
||||
Console.WriteLine(await agent.RunAsync("What is my favorite color?", session));
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
# Bounded Chat History with Vector Store Overflow
|
||||
|
||||
This sample demonstrates how to create a custom `ChatHistoryProvider` that keeps a bounded window of recent messages in session state and automatically overflows older messages to a vector store. When the agent is invoked, it searches the vector store for relevant older messages and prepends them as memory context.
|
||||
|
||||
## Concepts
|
||||
|
||||
- **`TruncatingChatReducer`**: A custom `IChatReducer` that keeps the most recent N messages and exposes removed messages via a `RemovedMessages` property.
|
||||
- **`BoundedChatHistoryProvider`**: A custom `ChatHistoryProvider` that composes:
|
||||
- `InMemoryChatHistoryProvider` for fast session-state storage (bounded by the reducer)
|
||||
- `ChatHistoryMemoryProvider` for vector-store overflow and semantic search of older messages
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [.NET 10 SDK](https://dotnet.microsoft.com/download/dotnet/10.0)
|
||||
- An Azure OpenAI resource with:
|
||||
- A chat deployment (e.g., `gpt-4o-mini`)
|
||||
- An embedding deployment (e.g., `text-embedding-3-large`)
|
||||
|
||||
## Configuration
|
||||
|
||||
Set the following environment variables:
|
||||
|
||||
| Variable | Description | Default |
|
||||
|---|---|---|
|
||||
| `AZURE_OPENAI_ENDPOINT` | Your Azure OpenAI endpoint URL | *(required)* |
|
||||
| `AZURE_OPENAI_DEPLOYMENT_NAME` | Chat model deployment name | `gpt-4o-mini` |
|
||||
| `AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME` | Embedding model deployment name | `text-embedding-3-large` |
|
||||
|
||||
## Running the Sample
|
||||
|
||||
```bash
|
||||
dotnet run
|
||||
```
|
||||
|
||||
## How it Works
|
||||
|
||||
1. The agent starts a conversation with a bounded session window of 4 non-system, non-function messages (i.e., user/assistant turns). System messages are always preserved, and function call/result messages are truncated and not preserved.
|
||||
2. As messages accumulate beyond the limit, the `TruncatingChatReducer` removes the oldest messages.
|
||||
3. The `BoundedChatHistoryProvider` detects the removed messages and stores them in a vector store via `ChatHistoryMemoryProvider`.
|
||||
4. On subsequent invocations, the provider searches the vector store for relevant older messages and prepends them as memory context, allowing the agent to recall information from earlier in the conversation.
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
using Microsoft.Extensions.AI;
|
||||
|
||||
namespace SampleApp;
|
||||
|
||||
/// <summary>
|
||||
/// A truncating chat reducer that keeps the most recent messages up to a configured maximum,
|
||||
/// preserving any leading system message. Removed messages are exposed via <see cref="RemovedMessages"/>
|
||||
/// so that a caller can archive them (e.g. to a vector store).
|
||||
/// </summary>
|
||||
internal sealed class TruncatingChatReducer : IChatReducer
|
||||
{
|
||||
private readonly int _maxMessages;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TruncatingChatReducer"/> class.
|
||||
/// </summary>
|
||||
/// <param name="maxMessages">The maximum number of non-system messages to retain.</param>
|
||||
public TruncatingChatReducer(int maxMessages)
|
||||
{
|
||||
this._maxMessages = maxMessages > 0 ? maxMessages : throw new ArgumentOutOfRangeException(nameof(maxMessages));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the messages that were removed during the most recent call to <see cref="ReduceAsync"/>.
|
||||
/// </summary>
|
||||
public IReadOnlyList<ChatMessage> RemovedMessages { get; private set; } = [];
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IEnumerable<ChatMessage>> ReduceAsync(IEnumerable<ChatMessage> messages, CancellationToken cancellationToken)
|
||||
{
|
||||
_ = messages ?? throw new ArgumentNullException(nameof(messages));
|
||||
|
||||
ChatMessage? systemMessage = null;
|
||||
Queue<ChatMessage> retained = new(capacity: this._maxMessages);
|
||||
List<ChatMessage> removed = [];
|
||||
|
||||
foreach (var message in messages)
|
||||
{
|
||||
if (message.Role == ChatRole.System)
|
||||
{
|
||||
// Preserve the first system message outside the counting window.
|
||||
systemMessage ??= message;
|
||||
}
|
||||
else if (!message.Contents.Any(c => c is FunctionCallContent or FunctionResultContent))
|
||||
{
|
||||
if (retained.Count >= this._maxMessages)
|
||||
{
|
||||
removed.Add(retained.Dequeue());
|
||||
}
|
||||
|
||||
retained.Enqueue(message);
|
||||
}
|
||||
}
|
||||
|
||||
this.RemovedMessages = removed;
|
||||
|
||||
IEnumerable<ChatMessage> result = systemMessage is not null
|
||||
? new[] { systemMessage }.Concat(retained)
|
||||
: retained;
|
||||
|
||||
return Task.FromResult(result);
|
||||
}
|
||||
}
|
||||
@@ -8,5 +8,6 @@ These samples show how to create an agent with the Agent Framework that uses Mem
|
||||
|[Memory with MemoryStore](./AgentWithMemory_Step02_MemoryUsingMem0/)|This sample demonstrates how to create and run an agent that uses the Mem0 service to extract and retrieve individual memories.|
|
||||
|[Custom Memory Implementation](../../01-get-started/04_memory/)|This sample demonstrates how to create a custom memory component and attach it to an agent.|
|
||||
|[Memory with Azure AI Foundry](./AgentWithMemory_Step04_MemoryUsingFoundry/)|This sample demonstrates how to create and run an agent that uses Azure AI Foundry's managed memory service to extract and retrieve individual memories.|
|
||||
|[Bounded Chat History with Overflow](./AgentWithMemory_Step05_BoundedChatHistory/)|This sample demonstrates how to create a bounded chat history provider that overflows older messages to a vector store and recalls them as memories.|
|
||||
|
||||
> **See also**: [Memory Search with Foundry Agents](../FoundryAgents/FoundryAgents_Step22_MemorySearch/) - demonstrates using the built-in Memory Search tool with Azure Foundry Agents.
|
||||
|
||||
Reference in New Issue
Block a user