mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
fix: FS Checkpoint storage special character support (#4730)
The `sessionId`, an optional parameter when starting a new session when running a workflow is an arbitrary string. This allows consumers to support whatever ids are needed by other systems, but can result in errors when an OS special or forbidden character is included. The fix is to escape the paths, in a 1:1 manner. We rely on EncodeDataString to do this. * Also modifies the index file to make it easier to determine what the name of the file on disk is for a given `sessionId`.
This commit is contained in:
committed by
GitHub
Unverified
parent
87962e53c5
commit
a9db40886e
+25
-8
@@ -5,11 +5,14 @@ using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization.Metadata;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Microsoft.Agents.AI.Workflows.Checkpointing;
|
||||
|
||||
internal record CheckpointFileIndexEntry(CheckpointInfo CheckpointInfo, string FileName);
|
||||
|
||||
/// <summary>
|
||||
/// Provides a file system-based implementation of a JSON checkpoint store that persists checkpoint data and index
|
||||
/// information to disk using JSON files.
|
||||
@@ -28,6 +31,8 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
|
||||
internal DirectoryInfo Directory { get; }
|
||||
internal HashSet<CheckpointInfo> CheckpointIndex { get; }
|
||||
|
||||
private static JsonTypeInfo<CheckpointFileIndexEntry> EntryTypeInfo => WorkflowsJsonUtilities.JsonContext.Default.CheckpointFileIndexEntry;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="FileSystemJsonCheckpointStore"/> class that uses the specified directory
|
||||
/// </summary>
|
||||
@@ -64,9 +69,11 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
|
||||
using StreamReader reader = new(this._indexFile, encoding: Encoding.UTF8, detectEncodingFromByteOrderMarks: false, BufferSize, leaveOpen: true);
|
||||
while (reader.ReadLine() is string line)
|
||||
{
|
||||
if (JsonSerializer.Deserialize(line, KeyTypeInfo) is { } info)
|
||||
if (JsonSerializer.Deserialize(line, EntryTypeInfo) is { } entry)
|
||||
{
|
||||
this.CheckpointIndex.Add(info);
|
||||
// We never actually use the file names from the index entries since they can be derived from the CheckpointInfo, but it is useful to
|
||||
// have the UrlEncoded file names in the index file for human readability
|
||||
this.CheckpointIndex.Add(entry.CheckpointInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -93,8 +100,14 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
|
||||
}
|
||||
}
|
||||
|
||||
private string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key)
|
||||
=> Path.Combine(this.Directory.FullName, $"{sessionId}_{key.CheckpointId}.json");
|
||||
internal string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key)
|
||||
{
|
||||
string protoPath = $"{sessionId}_{key.CheckpointId}.json";
|
||||
|
||||
// Escape the protoPath to ensure it is a valid file name, especially if sessionId or CheckpointId contain path separators, etc.
|
||||
return Uri.EscapeDataString(protoPath) // This takes care of most of the invalid path characters
|
||||
.Replace(".", "%2E"); // This takes care of escaping the root folder, since EscapeDataString does not escape dots
|
||||
}
|
||||
|
||||
private CheckpointInfo GetUnusedCheckpointInfo(string sessionId)
|
||||
{
|
||||
@@ -116,13 +129,16 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
|
||||
|
||||
CheckpointInfo key = this.GetUnusedCheckpointInfo(sessionId);
|
||||
string fileName = this.GetFileNameForCheckpoint(sessionId, key);
|
||||
string filePath = Path.Combine(this.Directory.FullName, fileName);
|
||||
|
||||
try
|
||||
{
|
||||
using Stream checkpointStream = File.Open(fileName, FileMode.Create, FileAccess.Write, FileShare.None);
|
||||
using Stream checkpointStream = File.Open(filePath, FileMode.Create, FileAccess.Write, FileShare.None);
|
||||
using Utf8JsonWriter jsonWriter = new(checkpointStream, new JsonWriterOptions() { Indented = false });
|
||||
value.WriteTo(jsonWriter);
|
||||
|
||||
JsonSerializer.Serialize(this._indexFile!, key, KeyTypeInfo);
|
||||
CheckpointFileIndexEntry entry = new(key, fileName);
|
||||
JsonSerializer.Serialize(this._indexFile!, entry, EntryTypeInfo);
|
||||
byte[] bytes = Encoding.UTF8.GetBytes(Environment.NewLine);
|
||||
await this._indexFile!.WriteAsync(bytes, 0, bytes.Length, CancellationToken.None).ConfigureAwait(false);
|
||||
await this._indexFile!.FlushAsync(CancellationToken.None).ConfigureAwait(false);
|
||||
@@ -136,7 +152,7 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
|
||||
try
|
||||
{
|
||||
// try to clean up after ourselves
|
||||
File.Delete(fileName);
|
||||
File.Delete(filePath);
|
||||
}
|
||||
catch { }
|
||||
|
||||
@@ -149,6 +165,7 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
|
||||
{
|
||||
this.CheckDisposed();
|
||||
string fileName = this.GetFileNameForCheckpoint(sessionId, key);
|
||||
string filePath = Path.Combine(this.Directory.FullName, fileName);
|
||||
|
||||
if (!this.CheckpointIndex.Contains(key) ||
|
||||
!File.Exists(fileName))
|
||||
@@ -156,7 +173,7 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
|
||||
throw new KeyNotFoundException($"Checkpoint '{key.CheckpointId}' not found in store at '{this.Directory.FullName}'.");
|
||||
}
|
||||
|
||||
using FileStream checkpointFileStream = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
using FileStream checkpointFileStream = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
using JsonDocument document = await JsonDocument.ParseAsync(checkpointFileStream).ConfigureAwait(false);
|
||||
|
||||
return document.RootElement.Clone();
|
||||
|
||||
@@ -71,6 +71,7 @@ internal static partial class WorkflowsJsonUtilities
|
||||
[JsonSerializable(typeof(PortableValue))]
|
||||
[JsonSerializable(typeof(PortableMessageEnvelope))]
|
||||
[JsonSerializable(typeof(InMemoryCheckpointManager))]
|
||||
[JsonSerializable(typeof(CheckpointFileIndexEntry))]
|
||||
|
||||
// Runtime State Types
|
||||
[JsonSerializable(typeof(ScopeKey))]
|
||||
|
||||
+159
-35
@@ -9,49 +9,173 @@ using Microsoft.Agents.AI.Workflows.Checkpointing;
|
||||
|
||||
namespace Microsoft.Agents.AI.Workflows.UnitTests;
|
||||
|
||||
internal sealed class TempDirectory : IDisposable
|
||||
{
|
||||
public DirectoryInfo DirectoryInfo { get; }
|
||||
|
||||
public TempDirectory()
|
||||
{
|
||||
string tempDirPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
|
||||
this.DirectoryInfo = Directory.CreateDirectory(tempDirPath);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
this.DisposeInternal();
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
private void DisposeInternal()
|
||||
{
|
||||
if (this.DirectoryInfo.Exists)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Best efforts
|
||||
this.DirectoryInfo.Delete(recursive: true);
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
}
|
||||
|
||||
~TempDirectory()
|
||||
{
|
||||
// Best efforts
|
||||
this.DisposeInternal();
|
||||
}
|
||||
|
||||
public static implicit operator DirectoryInfo(TempDirectory tempDirectory) => tempDirectory.DirectoryInfo;
|
||||
|
||||
public string FullName => this.DirectoryInfo.FullName;
|
||||
|
||||
public bool IsParentOf(FileInfo candidate)
|
||||
{
|
||||
if (candidate.Directory is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (candidate.Directory.FullName == this.DirectoryInfo.FullName)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return this.IsParentOf(candidate.Directory);
|
||||
}
|
||||
|
||||
public bool IsParentOf(DirectoryInfo candidate)
|
||||
{
|
||||
while (candidate.Parent is not null)
|
||||
{
|
||||
if (candidate.Parent.FullName == this.DirectoryInfo.FullName)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
candidate = candidate.Parent;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public sealed class FileSystemJsonCheckpointStoreTests
|
||||
{
|
||||
public static JsonElement TestData => JsonSerializer.SerializeToElement(new { test = "data" });
|
||||
|
||||
[Fact]
|
||||
public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync()
|
||||
{
|
||||
// Arrange
|
||||
DirectoryInfo tempDir = new(Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()));
|
||||
FileSystemJsonCheckpointStore? store = null;
|
||||
using TempDirectory tempDirectory = new();
|
||||
using FileSystemJsonCheckpointStore? store = new(tempDirectory);
|
||||
|
||||
try
|
||||
string runId = Guid.NewGuid().ToString("N");
|
||||
|
||||
// Act
|
||||
CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, TestData);
|
||||
|
||||
// Assert - Check the file size before disposing to verify data was flushed to disk
|
||||
// The index.jsonl file is held exclusively by the store, so we check via FileInfo
|
||||
string indexPath = Path.Combine(tempDirectory.FullName, "index.jsonl");
|
||||
FileInfo indexFile = new(indexPath);
|
||||
indexFile.Refresh();
|
||||
long fileSizeBeforeDispose = indexFile.Length;
|
||||
|
||||
// Data should already be on disk (file size > 0) before we dispose
|
||||
fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync");
|
||||
|
||||
// Dispose to release file lock before final verification
|
||||
store.Dispose();
|
||||
|
||||
string[] lines = File.ReadAllLines(indexPath);
|
||||
lines.Should().HaveCount(1);
|
||||
lines[0].Should().Contain(checkpoint.CheckpointId);
|
||||
}
|
||||
|
||||
private async ValueTask Run_EscapeRootFolderTestAsync(string escapingPath)
|
||||
{
|
||||
// Arrange
|
||||
using TempDirectory tempDirectory = new();
|
||||
using FileSystemJsonCheckpointStore store = new(tempDirectory);
|
||||
|
||||
string naivePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, escapingPath);
|
||||
|
||||
// Check that the naive path is actually outside the temp directory to validate the test is meaningful
|
||||
FileInfo naiveCheckpointFile = new(naivePath);
|
||||
tempDirectory.IsParentOf(naiveCheckpointFile).Should().BeFalse("The naive path should be outside the root folder to validate that escaping is necessary.");
|
||||
|
||||
// Act
|
||||
CheckpointInfo checkpointInfo = await store.CreateCheckpointAsync(escapingPath, TestData);
|
||||
|
||||
// Assert
|
||||
string naivePathWithCheckpointId = Path.Combine(tempDirectory.DirectoryInfo.FullName, $"{escapingPath}_{checkpointInfo.CheckpointId}.json");
|
||||
new FileInfo(naivePathWithCheckpointId).Exists.Should().BeFalse("The naive path should not be used to save a checkpoint file.");
|
||||
|
||||
string actualFileName = store.GetFileNameForCheckpoint(escapingPath, checkpointInfo);
|
||||
string actualFilePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, actualFileName);
|
||||
FileInfo actualFile = new(actualFilePath);
|
||||
|
||||
tempDirectory.IsParentOf(actualFile).Should().BeTrue("The actual checkpoint should be saved inside the root folder.");
|
||||
actualFile.Exists.Should().BeTrue("The actual path should be used to save a checkpoint file.");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CreateCheckpointAsync_ShouldNotEscapeRootFolderAsync()
|
||||
{
|
||||
// The SessionId is used as part of the file name, but if it contains path characters such as /.. it can escape the root folder.
|
||||
// Testing that such characters are escaped properly to prevent directory traversal attacks, etc.
|
||||
|
||||
await this.Run_EscapeRootFolderTestAsync("../valid_suffix");
|
||||
|
||||
#if !NETFRAMEWORK
|
||||
if (OperatingSystem.IsWindows())
|
||||
{
|
||||
store = new(tempDir);
|
||||
string runId = Guid.NewGuid().ToString("N");
|
||||
JsonElement testData = JsonSerializer.SerializeToElement(new { test = "data" });
|
||||
|
||||
// Act
|
||||
CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, testData);
|
||||
|
||||
// Assert - Check the file size before disposing to verify data was flushed to disk
|
||||
// The index.jsonl file is held exclusively by the store, so we check via FileInfo
|
||||
string indexPath = Path.Combine(tempDir.FullName, "index.jsonl");
|
||||
FileInfo indexFile = new(indexPath);
|
||||
indexFile.Refresh();
|
||||
long fileSizeBeforeDispose = indexFile.Length;
|
||||
|
||||
// Data should already be on disk (file size > 0) before we dispose
|
||||
fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync");
|
||||
|
||||
// Dispose to release file lock before final verification
|
||||
store.Dispose();
|
||||
store = null;
|
||||
|
||||
string[] lines = File.ReadAllLines(indexPath);
|
||||
lines.Should().HaveCount(1);
|
||||
lines[0].Should().Contain(checkpoint.CheckpointId);
|
||||
}
|
||||
finally
|
||||
{
|
||||
store?.Dispose();
|
||||
if (tempDir.Exists)
|
||||
{
|
||||
tempDir.Delete(recursive: true);
|
||||
}
|
||||
// Windows allows both \ and / as path separators, so we test both
|
||||
await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix");
|
||||
}
|
||||
#else
|
||||
// .NET Framework is always on Windows
|
||||
await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix");
|
||||
#endif
|
||||
}
|
||||
|
||||
private const string InvalidPathCharsWin32 = "\\/:*?\"<>|";
|
||||
private const string InvalidPathCharsUnix = "/";
|
||||
private const string InvalidPathCharsMacOS = "/:";
|
||||
|
||||
[Theory]
|
||||
[InlineData(InvalidPathCharsWin32)]
|
||||
[InlineData(InvalidPathCharsUnix)]
|
||||
[InlineData(InvalidPathCharsMacOS)]
|
||||
public async Task CreateCheckpointAsync_EscapesInvalidCharsAsync(string invalidChars)
|
||||
{
|
||||
// Arrange
|
||||
using TempDirectory tempDirectory = new();
|
||||
using FileSystemJsonCheckpointStore store = new(tempDirectory);
|
||||
|
||||
string runId = $"prefix_{invalidChars}_suffix";
|
||||
|
||||
Func<Task> createCheckpointAction = async () => await store.CreateCheckpointAsync(runId, TestData);
|
||||
await createCheckpointAction.Should().NotThrowAsync();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user