.NET: Create a sample to show bounded chat history with overflow into chat history memory (#4136)

* Create a sample to show bounded chat history with overflow into chat history memory

* Address PR comments.

* Address PR comment and fix bug
This commit is contained in:
westey
2026-03-06 18:03:43 +00:00
committed by GitHub
Unverified
parent 394e9c1692
commit c8750cbe92
7 changed files with 341 additions and 0 deletions
+1
View File
@@ -103,6 +103,7 @@
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step01_ChatHistoryMemory/AgentWithMemory_Step01_ChatHistoryMemory.csproj" />
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step02_MemoryUsingMem0/AgentWithMemory_Step02_MemoryUsingMem0.csproj" />
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step04_MemoryUsingFoundry/AgentWithMemory_Step04_MemoryUsingFoundry.csproj" />
<Project Path="samples/02-agents/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/AgentWithMemory_Step05_BoundedChatHistory.csproj" />
</Folder>
<Folder Name="/Samples/02-agents/AgentWithOpenAI/">
<File Path="samples/02-agents/AgentWithOpenAI/README.md" />
@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net10.0</TargetFrameworks>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Azure.AI.OpenAI" />
<PackageReference Include="Azure.Identity" />
<PackageReference Include="Microsoft.Extensions.AI.OpenAI" />
<PackageReference Include="Microsoft.SemanticKernel.Connectors.InMemory" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\..\src\Microsoft.Agents.AI.OpenAI\Microsoft.Agents.AI.OpenAI.csproj" />
</ItemGroup>
</Project>
@@ -0,0 +1,133 @@
// Copyright (c) Microsoft. All rights reserved.
using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.VectorData;
namespace SampleApp;
/// <summary>
/// A <see cref="ChatHistoryProvider"/> that keeps a bounded window of recent messages in session state
/// (via <see cref="InMemoryChatHistoryProvider"/>) and overflows older messages to a vector store
/// (via <see cref="ChatHistoryMemoryProvider"/>). When providing chat history, it searches the vector
/// store for relevant older messages and prepends them as a memory context message.
/// </summary>
/// <remarks>
/// Only non-system messages are counted towards the session state limit and overflow mechanism. System messages are always retained in session state and are not included in the vector store.
/// Function calls and function results are also dropped when truncation happens, both from in-memory state, and they are also not persisted to the vector store.
/// </remarks>
internal sealed class BoundedChatHistoryProvider : ChatHistoryProvider, IDisposable
{
private readonly InMemoryChatHistoryProvider _chatHistoryProvider;
private readonly ChatHistoryMemoryProvider _memoryProvider;
private readonly TruncatingChatReducer _reducer;
private readonly string _contextPrompt;
private IReadOnlyList<string>? _stateKeys;
/// <summary>
/// Initializes a new instance of the <see cref="BoundedChatHistoryProvider"/> class.
/// </summary>
/// <param name="maxSessionMessages">The maximum number of non-system messages to keep in session state before overflowing to the vector store.</param>
/// <param name="vectorStore">The vector store to use for storing and retrieving overflow chat history.</param>
/// <param name="collectionName">The name of the collection for storing overflow chat history in the vector store.</param>
/// <param name="vectorDimensions">The number of dimensions to use for the chat history vector store embeddings.</param>
/// <param name="stateInitializer">A delegate that initializes the memory provider state, providing the storage and search scopes.</param>
/// <param name="contextPrompt">Optional prompt to prefix memory search results. Defaults to a standard memory context prompt.</param>
public BoundedChatHistoryProvider(
int maxSessionMessages,
VectorStore vectorStore,
string collectionName,
int vectorDimensions,
Func<AgentSession?, ChatHistoryMemoryProvider.State> stateInitializer,
string? contextPrompt = null)
{
if (maxSessionMessages < 0)
{
throw new ArgumentOutOfRangeException(nameof(maxSessionMessages), "maxSessionMessages must be non-negative.");
}
this._reducer = new TruncatingChatReducer(maxSessionMessages);
this._chatHistoryProvider = new InMemoryChatHistoryProvider(new InMemoryChatHistoryProviderOptions
{
ChatReducer = this._reducer,
ReducerTriggerEvent = InMemoryChatHistoryProviderOptions.ChatReducerTriggerEvent.AfterMessageAdded,
StorageInputRequestMessageFilter = msgs => msgs,
});
this._memoryProvider = new ChatHistoryMemoryProvider(
vectorStore,
collectionName,
vectorDimensions,
stateInitializer,
options: new ChatHistoryMemoryProviderOptions
{
SearchInputMessageFilter = msgs => msgs,
StorageInputRequestMessageFilter = msgs => msgs,
});
this._contextPrompt = contextPrompt
?? "The following are memories from earlier in this conversation. Use them to inform your responses:";
}
/// <inheritdoc />
public override IReadOnlyList<string> StateKeys => this._stateKeys ??= this._chatHistoryProvider.StateKeys.Concat(this._memoryProvider.StateKeys).ToArray();
/// <inheritdoc />
protected override async ValueTask<IEnumerable<ChatMessage>> ProvideChatHistoryAsync(
InvokingContext context,
CancellationToken cancellationToken = default)
{
// Delegate to the inner provider's full lifecycle (retrieve, filter, stamp, merge with request messages).
var chatHistoryProviderInputContext = new InvokingContext(context.Agent, context.Session, []);
var allMessages = await this._chatHistoryProvider.InvokingAsync(chatHistoryProviderInputContext, cancellationToken).ConfigureAwait(false);
// Search the vector store for relevant older messages.
var aiContext = new AIContext { Messages = context.RequestMessages.ToList() };
var invokingContext = new AIContextProvider.InvokingContext(
context.Agent, context.Session, aiContext);
var result = await this._memoryProvider.InvokingAsync(invokingContext, cancellationToken).ConfigureAwait(false);
// Extract only the messages added by the memory provider (stamped with AIContextProvider source type).
var memoryMessages = result.Messages?
.Where(m => m.GetAgentRequestMessageSourceType() == AgentRequestMessageSourceType.AIContextProvider)
.ToList();
if (memoryMessages is { Count: > 0 })
{
var memoryText = string.Join("\n", memoryMessages.Select(m => m.Text).Where(t => !string.IsNullOrWhiteSpace(t)));
if (!string.IsNullOrWhiteSpace(memoryText))
{
var contextMessage = new ChatMessage(ChatRole.User, $"{this._contextPrompt}\n{memoryText}");
return new[] { contextMessage }.Concat(allMessages);
}
}
return allMessages;
}
/// <inheritdoc />
protected override async ValueTask StoreChatHistoryAsync(
InvokedContext context,
CancellationToken cancellationToken = default)
{
// Delegate storage to the in-memory provider. Its TruncatingChatReducer (AfterMessageAdded trigger)
// will automatically truncate to the configured maximum and expose any removed messages.
var innerContext = new InvokedContext(
context.Agent, context.Session, context.RequestMessages, context.ResponseMessages!);
await this._chatHistoryProvider.InvokedAsync(innerContext, cancellationToken).ConfigureAwait(false);
// Archive any messages that the reducer removed to the vector store.
if (this._reducer.RemovedMessages is { Count: > 0 })
{
var overflowContext = new AIContextProvider.InvokedContext(
context.Agent, context.Session, this._reducer.RemovedMessages, []);
await this._memoryProvider.InvokedAsync(overflowContext, cancellationToken).ConfigureAwait(false);
}
}
/// <inheritdoc/>
public void Dispose()
{
this._memoryProvider.Dispose();
}
}
@@ -0,0 +1,79 @@
// Copyright (c) Microsoft. All rights reserved.
// This sample shows how to create a bounded chat history provider that keeps a configurable number of
// recent messages in session state and automatically overflows older messages to a vector store.
// When the agent is invoked, it searches the vector store for relevant older messages and
// prepends them as a "memory" context message before the recent session history.
using Azure.AI.OpenAI;
using Azure.Identity;
using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel.Connectors.InMemory;
using OpenAI.Chat;
using SampleApp;
var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("AZURE_OPENAI_ENDPOINT is not set.");
var deploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT_NAME") ?? "gpt-4o-mini";
var embeddingDeploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME") ?? "text-embedding-3-large";
// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid
// latency issues, unintended credential probing, and potential security risks from fallback mechanisms.
var credential = new DefaultAzureCredential();
// Create a vector store to store overflow chat messages.
// For demonstration purposes, we are using an in-memory vector store.
// Replace this with a persistent vector store implementation for production scenarios.
VectorStore vectorStore = new InMemoryVectorStore(new InMemoryVectorStoreOptions()
{
EmbeddingGenerator = new AzureOpenAIClient(new Uri(endpoint), credential)
.GetEmbeddingClient(embeddingDeploymentName)
.AsIEmbeddingGenerator()
});
var sessionId = Guid.NewGuid().ToString();
// Create the BoundedChatHistoryProvider with a maximum of 4 non-system messages in session state.
// It internally creates an InMemoryChatHistoryProvider with a TruncatingChatReducer and a
// ChatHistoryMemoryProvider with the correct configuration to ensure overflow messages are
// automatically archived to the vector store and recalled via semantic search.
var boundedProvider = new BoundedChatHistoryProvider(
maxSessionMessages: 4,
vectorStore,
collectionName: "chathistory-overflow",
vectorDimensions: 3072,
session => new ChatHistoryMemoryProvider.State(
storageScope: new() { UserId = "UID1", SessionId = sessionId },
searchScope: new() { UserId = "UID1" }));
// Create the agent with the bounded chat history provider.
AIAgent agent = new AzureOpenAIClient(new Uri(endpoint), credential)
.GetChatClient(deploymentName)
.AsAIAgent(new ChatClientAgentOptions
{
ChatOptions = new() { Instructions = "You are a helpful assistant. Answer questions concisely." },
Name = "Assistant",
ChatHistoryProvider = boundedProvider,
});
// Start a conversation. The first several exchanges will fill up the session state window.
AgentSession session = await agent.CreateSessionAsync();
Console.WriteLine("--- Filling the session window (4 messages max) ---\n");
Console.WriteLine(await agent.RunAsync("My favorite color is blue.", session));
Console.WriteLine(await agent.RunAsync("I have a dog named Max.", session));
// At this point the session state holds 4 messages (2 user + 2 assistant).
// The next exchange will push the oldest messages into the vector store.
Console.WriteLine("\n--- Next exchange will trigger overflow to vector store ---\n");
Console.WriteLine(await agent.RunAsync("What is the capital of France?", session));
// The oldest messages about favorite color have now been archived to the vector store.
// Ask the agent something that requires recalling the overflowed information.
Console.WriteLine("\n--- Asking about overflowed information (should recall from vector store) ---\n");
Console.WriteLine(await agent.RunAsync("What is my favorite color?", session));
@@ -0,0 +1,40 @@
# Bounded Chat History with Vector Store Overflow
This sample demonstrates how to create a custom `ChatHistoryProvider` that keeps a bounded window of recent messages in session state and automatically overflows older messages to a vector store. When the agent is invoked, it searches the vector store for relevant older messages and prepends them as memory context.
## Concepts
- **`TruncatingChatReducer`**: A custom `IChatReducer` that keeps the most recent N messages and exposes removed messages via a `RemovedMessages` property.
- **`BoundedChatHistoryProvider`**: A custom `ChatHistoryProvider` that composes:
- `InMemoryChatHistoryProvider` for fast session-state storage (bounded by the reducer)
- `ChatHistoryMemoryProvider` for vector-store overflow and semantic search of older messages
## Prerequisites
- [.NET 10 SDK](https://dotnet.microsoft.com/download/dotnet/10.0)
- An Azure OpenAI resource with:
- A chat deployment (e.g., `gpt-4o-mini`)
- An embedding deployment (e.g., `text-embedding-3-large`)
## Configuration
Set the following environment variables:
| Variable | Description | Default |
|---|---|---|
| `AZURE_OPENAI_ENDPOINT` | Your Azure OpenAI endpoint URL | *(required)* |
| `AZURE_OPENAI_DEPLOYMENT_NAME` | Chat model deployment name | `gpt-4o-mini` |
| `AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME` | Embedding model deployment name | `text-embedding-3-large` |
## Running the Sample
```bash
dotnet run
```
## How it Works
1. The agent starts a conversation with a bounded session window of 4 non-system, non-function messages (i.e., user/assistant turns). System messages are always preserved, and function call/result messages are truncated and not preserved.
2. As messages accumulate beyond the limit, the `TruncatingChatReducer` removes the oldest messages.
3. The `BoundedChatHistoryProvider` detects the removed messages and stores them in a vector store via `ChatHistoryMemoryProvider`.
4. On subsequent invocations, the provider searches the vector store for relevant older messages and prepends them as memory context, allowing the agent to recall information from earlier in the conversation.
@@ -0,0 +1,65 @@
// Copyright (c) Microsoft. All rights reserved.
using Microsoft.Extensions.AI;
namespace SampleApp;
/// <summary>
/// A truncating chat reducer that keeps the most recent messages up to a configured maximum,
/// preserving any leading system message. Removed messages are exposed via <see cref="RemovedMessages"/>
/// so that a caller can archive them (e.g. to a vector store).
/// </summary>
internal sealed class TruncatingChatReducer : IChatReducer
{
private readonly int _maxMessages;
/// <summary>
/// Initializes a new instance of the <see cref="TruncatingChatReducer"/> class.
/// </summary>
/// <param name="maxMessages">The maximum number of non-system messages to retain.</param>
public TruncatingChatReducer(int maxMessages)
{
this._maxMessages = maxMessages > 0 ? maxMessages : throw new ArgumentOutOfRangeException(nameof(maxMessages));
}
/// <summary>
/// Gets the messages that were removed during the most recent call to <see cref="ReduceAsync"/>.
/// </summary>
public IReadOnlyList<ChatMessage> RemovedMessages { get; private set; } = [];
/// <inheritdoc />
public Task<IEnumerable<ChatMessage>> ReduceAsync(IEnumerable<ChatMessage> messages, CancellationToken cancellationToken)
{
_ = messages ?? throw new ArgumentNullException(nameof(messages));
ChatMessage? systemMessage = null;
Queue<ChatMessage> retained = new(capacity: this._maxMessages);
List<ChatMessage> removed = [];
foreach (var message in messages)
{
if (message.Role == ChatRole.System)
{
// Preserve the first system message outside the counting window.
systemMessage ??= message;
}
else if (!message.Contents.Any(c => c is FunctionCallContent or FunctionResultContent))
{
if (retained.Count >= this._maxMessages)
{
removed.Add(retained.Dequeue());
}
retained.Enqueue(message);
}
}
this.RemovedMessages = removed;
IEnumerable<ChatMessage> result = systemMessage is not null
? new[] { systemMessage }.Concat(retained)
: retained;
return Task.FromResult(result);
}
}
@@ -8,5 +8,6 @@ These samples show how to create an agent with the Agent Framework that uses Mem
|[Memory with MemoryStore](./AgentWithMemory_Step02_MemoryUsingMem0/)|This sample demonstrates how to create and run an agent that uses the Mem0 service to extract and retrieve individual memories.|
|[Custom Memory Implementation](../../01-get-started/04_memory/)|This sample demonstrates how to create a custom memory component and attach it to an agent.|
|[Memory with Azure AI Foundry](./AgentWithMemory_Step04_MemoryUsingFoundry/)|This sample demonstrates how to create and run an agent that uses Azure AI Foundry's managed memory service to extract and retrieve individual memories.|
|[Bounded Chat History with Overflow](./AgentWithMemory_Step05_BoundedChatHistory/)|This sample demonstrates how to create a bounded chat history provider that overflows older messages to a vector store and recalls them as memories.|
> **See also**: [Memory Search with Foundry Agents](../FoundryAgents/FoundryAgents_Step22_MemorySearch/) - demonstrates using the built-in Memory Search tool with Azure Foundry Agents.