fix: FS Checkpoint storage special character support (#4730)

The `sessionId`, an optional parameter when starting a new session when
running a workflow is an arbitrary string. This allows consumers to
support whatever ids are needed by other systems, but can result in
errors when an OS special or forbidden character is included.

The fix is to escape the paths, in a 1:1 manner. We rely on
EncodeDataString to do this.

* Also modifies the index file to make it easier to determine what the
  name of the file on disk is for a given `sessionId`.
This commit is contained in:
Jacob Alber
2026-03-25 11:21:30 -04:00
committed by GitHub
Unverified
parent 87962e53c5
commit a9db40886e
3 changed files with 185 additions and 43 deletions
@@ -5,11 +5,14 @@ using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization.Metadata;
using System.Threading;
using System.Threading.Tasks;
namespace Microsoft.Agents.AI.Workflows.Checkpointing;
internal record CheckpointFileIndexEntry(CheckpointInfo CheckpointInfo, string FileName);
/// <summary>
/// Provides a file system-based implementation of a JSON checkpoint store that persists checkpoint data and index
/// information to disk using JSON files.
@@ -28,6 +31,8 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
internal DirectoryInfo Directory { get; }
internal HashSet<CheckpointInfo> CheckpointIndex { get; }
private static JsonTypeInfo<CheckpointFileIndexEntry> EntryTypeInfo => WorkflowsJsonUtilities.JsonContext.Default.CheckpointFileIndexEntry;
/// <summary>
/// Initializes a new instance of the <see cref="FileSystemJsonCheckpointStore"/> class that uses the specified directory
/// </summary>
@@ -64,9 +69,11 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
using StreamReader reader = new(this._indexFile, encoding: Encoding.UTF8, detectEncodingFromByteOrderMarks: false, BufferSize, leaveOpen: true);
while (reader.ReadLine() is string line)
{
if (JsonSerializer.Deserialize(line, KeyTypeInfo) is { } info)
if (JsonSerializer.Deserialize(line, EntryTypeInfo) is { } entry)
{
this.CheckpointIndex.Add(info);
// We never actually use the file names from the index entries since they can be derived from the CheckpointInfo, but it is useful to
// have the UrlEncoded file names in the index file for human readability
this.CheckpointIndex.Add(entry.CheckpointInfo);
}
}
}
@@ -93,8 +100,14 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
}
}
private string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key)
=> Path.Combine(this.Directory.FullName, $"{sessionId}_{key.CheckpointId}.json");
internal string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key)
{
string protoPath = $"{sessionId}_{key.CheckpointId}.json";
// Escape the protoPath to ensure it is a valid file name, especially if sessionId or CheckpointId contain path separators, etc.
return Uri.EscapeDataString(protoPath) // This takes care of most of the invalid path characters
.Replace(".", "%2E"); // This takes care of escaping the root folder, since EscapeDataString does not escape dots
}
private CheckpointInfo GetUnusedCheckpointInfo(string sessionId)
{
@@ -116,13 +129,16 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
CheckpointInfo key = this.GetUnusedCheckpointInfo(sessionId);
string fileName = this.GetFileNameForCheckpoint(sessionId, key);
string filePath = Path.Combine(this.Directory.FullName, fileName);
try
{
using Stream checkpointStream = File.Open(fileName, FileMode.Create, FileAccess.Write, FileShare.None);
using Stream checkpointStream = File.Open(filePath, FileMode.Create, FileAccess.Write, FileShare.None);
using Utf8JsonWriter jsonWriter = new(checkpointStream, new JsonWriterOptions() { Indented = false });
value.WriteTo(jsonWriter);
JsonSerializer.Serialize(this._indexFile!, key, KeyTypeInfo);
CheckpointFileIndexEntry entry = new(key, fileName);
JsonSerializer.Serialize(this._indexFile!, entry, EntryTypeInfo);
byte[] bytes = Encoding.UTF8.GetBytes(Environment.NewLine);
await this._indexFile!.WriteAsync(bytes, 0, bytes.Length, CancellationToken.None).ConfigureAwait(false);
await this._indexFile!.FlushAsync(CancellationToken.None).ConfigureAwait(false);
@@ -136,7 +152,7 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
try
{
// try to clean up after ourselves
File.Delete(fileName);
File.Delete(filePath);
}
catch { }
@@ -149,6 +165,7 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
{
this.CheckDisposed();
string fileName = this.GetFileNameForCheckpoint(sessionId, key);
string filePath = Path.Combine(this.Directory.FullName, fileName);
if (!this.CheckpointIndex.Contains(key) ||
!File.Exists(fileName))
@@ -156,7 +173,7 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
throw new KeyNotFoundException($"Checkpoint '{key.CheckpointId}' not found in store at '{this.Directory.FullName}'.");
}
using FileStream checkpointFileStream = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);
using FileStream checkpointFileStream = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using JsonDocument document = await JsonDocument.ParseAsync(checkpointFileStream).ConfigureAwait(false);
return document.RootElement.Clone();
@@ -71,6 +71,7 @@ internal static partial class WorkflowsJsonUtilities
[JsonSerializable(typeof(PortableValue))]
[JsonSerializable(typeof(PortableMessageEnvelope))]
[JsonSerializable(typeof(InMemoryCheckpointManager))]
[JsonSerializable(typeof(CheckpointFileIndexEntry))]
// Runtime State Types
[JsonSerializable(typeof(ScopeKey))]
@@ -9,49 +9,173 @@ using Microsoft.Agents.AI.Workflows.Checkpointing;
namespace Microsoft.Agents.AI.Workflows.UnitTests;
internal sealed class TempDirectory : IDisposable
{
public DirectoryInfo DirectoryInfo { get; }
public TempDirectory()
{
string tempDirPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
this.DirectoryInfo = Directory.CreateDirectory(tempDirPath);
}
public void Dispose()
{
this.DisposeInternal();
GC.SuppressFinalize(this);
}
private void DisposeInternal()
{
if (this.DirectoryInfo.Exists)
{
try
{
// Best efforts
this.DirectoryInfo.Delete(recursive: true);
}
catch { }
}
}
~TempDirectory()
{
// Best efforts
this.DisposeInternal();
}
public static implicit operator DirectoryInfo(TempDirectory tempDirectory) => tempDirectory.DirectoryInfo;
public string FullName => this.DirectoryInfo.FullName;
public bool IsParentOf(FileInfo candidate)
{
if (candidate.Directory is null)
{
return false;
}
if (candidate.Directory.FullName == this.DirectoryInfo.FullName)
{
return true;
}
return this.IsParentOf(candidate.Directory);
}
public bool IsParentOf(DirectoryInfo candidate)
{
while (candidate.Parent is not null)
{
if (candidate.Parent.FullName == this.DirectoryInfo.FullName)
{
return true;
}
candidate = candidate.Parent;
}
return false;
}
}
public sealed class FileSystemJsonCheckpointStoreTests
{
public static JsonElement TestData => JsonSerializer.SerializeToElement(new { test = "data" });
[Fact]
public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync()
{
// Arrange
DirectoryInfo tempDir = new(Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()));
FileSystemJsonCheckpointStore? store = null;
using TempDirectory tempDirectory = new();
using FileSystemJsonCheckpointStore? store = new(tempDirectory);
try
string runId = Guid.NewGuid().ToString("N");
// Act
CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, TestData);
// Assert - Check the file size before disposing to verify data was flushed to disk
// The index.jsonl file is held exclusively by the store, so we check via FileInfo
string indexPath = Path.Combine(tempDirectory.FullName, "index.jsonl");
FileInfo indexFile = new(indexPath);
indexFile.Refresh();
long fileSizeBeforeDispose = indexFile.Length;
// Data should already be on disk (file size > 0) before we dispose
fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync");
// Dispose to release file lock before final verification
store.Dispose();
string[] lines = File.ReadAllLines(indexPath);
lines.Should().HaveCount(1);
lines[0].Should().Contain(checkpoint.CheckpointId);
}
private async ValueTask Run_EscapeRootFolderTestAsync(string escapingPath)
{
// Arrange
using TempDirectory tempDirectory = new();
using FileSystemJsonCheckpointStore store = new(tempDirectory);
string naivePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, escapingPath);
// Check that the naive path is actually outside the temp directory to validate the test is meaningful
FileInfo naiveCheckpointFile = new(naivePath);
tempDirectory.IsParentOf(naiveCheckpointFile).Should().BeFalse("The naive path should be outside the root folder to validate that escaping is necessary.");
// Act
CheckpointInfo checkpointInfo = await store.CreateCheckpointAsync(escapingPath, TestData);
// Assert
string naivePathWithCheckpointId = Path.Combine(tempDirectory.DirectoryInfo.FullName, $"{escapingPath}_{checkpointInfo.CheckpointId}.json");
new FileInfo(naivePathWithCheckpointId).Exists.Should().BeFalse("The naive path should not be used to save a checkpoint file.");
string actualFileName = store.GetFileNameForCheckpoint(escapingPath, checkpointInfo);
string actualFilePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, actualFileName);
FileInfo actualFile = new(actualFilePath);
tempDirectory.IsParentOf(actualFile).Should().BeTrue("The actual checkpoint should be saved inside the root folder.");
actualFile.Exists.Should().BeTrue("The actual path should be used to save a checkpoint file.");
}
[Fact]
public async Task CreateCheckpointAsync_ShouldNotEscapeRootFolderAsync()
{
// The SessionId is used as part of the file name, but if it contains path characters such as /.. it can escape the root folder.
// Testing that such characters are escaped properly to prevent directory traversal attacks, etc.
await this.Run_EscapeRootFolderTestAsync("../valid_suffix");
#if !NETFRAMEWORK
if (OperatingSystem.IsWindows())
{
store = new(tempDir);
string runId = Guid.NewGuid().ToString("N");
JsonElement testData = JsonSerializer.SerializeToElement(new { test = "data" });
// Act
CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, testData);
// Assert - Check the file size before disposing to verify data was flushed to disk
// The index.jsonl file is held exclusively by the store, so we check via FileInfo
string indexPath = Path.Combine(tempDir.FullName, "index.jsonl");
FileInfo indexFile = new(indexPath);
indexFile.Refresh();
long fileSizeBeforeDispose = indexFile.Length;
// Data should already be on disk (file size > 0) before we dispose
fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync");
// Dispose to release file lock before final verification
store.Dispose();
store = null;
string[] lines = File.ReadAllLines(indexPath);
lines.Should().HaveCount(1);
lines[0].Should().Contain(checkpoint.CheckpointId);
}
finally
{
store?.Dispose();
if (tempDir.Exists)
{
tempDir.Delete(recursive: true);
}
// Windows allows both \ and / as path separators, so we test both
await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix");
}
#else
// .NET Framework is always on Windows
await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix");
#endif
}
private const string InvalidPathCharsWin32 = "\\/:*?\"<>|";
private const string InvalidPathCharsUnix = "/";
private const string InvalidPathCharsMacOS = "/:";
[Theory]
[InlineData(InvalidPathCharsWin32)]
[InlineData(InvalidPathCharsUnix)]
[InlineData(InvalidPathCharsMacOS)]
public async Task CreateCheckpointAsync_EscapesInvalidCharsAsync(string invalidChars)
{
// Arrange
using TempDirectory tempDirectory = new();
using FileSystemJsonCheckpointStore store = new(tempDirectory);
string runId = $"prefix_{invalidChars}_suffix";
Func<Task> createCheckpointAction = async () => await store.CreateCheckpointAsync(runId, TestData);
await createCheckpointAction.Should().NotThrowAsync();
}
}