From e53e87b39ecf899c37bc0ced3e3c9dc6ad20834f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 19 May 2026 14:08:13 +0000
Subject: [PATCH] Add A2/A3/A4 gap tests; pin GroupChat duplicate-key bug
discovered by A2
Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/153b10f3-2555-45fe-b264-42a2e9100cf2
Co-authored-by: lokitoth <6936551+lokitoth@users.noreply.github.com>
---
.../issue-5350-root-cause-validation-plan.md | 22 +-
...ToolApprovalRequestCheckpointReproTests.cs | 305 ++++++++++++++++++
2 files changed, 315 insertions(+), 12 deletions(-)
diff --git a/docs/working/issue-5350-root-cause-validation-plan.md b/docs/working/issue-5350-root-cause-validation-plan.md
index 61360ba36a..3f1469f50b 100644
--- a/docs/working/issue-5350-root-cause-validation-plan.md
+++ b/docs/working/issue-5350-root-cause-validation-plan.md
@@ -75,19 +75,17 @@ the OP's reported scenario, varying one dimension at a time.
### Track A — Reproduce in a configuration closer to the OP's pattern "B"
-The OP's repro path differs from the new tests in three concrete ways. Test #7 (the
-maximal repro added to this PR) closes the biggest gap — it uses a real
-`ChatClientAgent` + `FunctionInvokingChatClient` + `ApprovalRequiredAIFunction` and
-still passes — but the remaining differences are still worth isolating. Each row below
-is "add a test that reproduces the OP's symptom (`postResume.ToolCall is not
-FunctionCallContent`)".
+The OP's repro path differs from the new tests in three concrete ways. Tests #7
+(maximal repro) and #8–#10 (A2 / A3 / A4 below) close all four gaps. The OP's
+specific hypothesis (`TARC.ToolCall is not FunctionCallContent` after resume) does
+not reproduce in any of them; A2 did however uncover a *separate, unrelated* bug.
-| Step | Variable that changes vs. the passing tests in this repo | Why it matters | Pass criteria |
-|------|--------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------|
-| A1 | ~~Use `ChatClientAgent` + `ApprovalRequiredAIFunction` bound into a `WorkflowBuilder`~~ — **covered by test #7 in this PR; passes.** | Was the largest gap to the OP's repro. Closed. | N/A — already passing. |
-| A2 | Multi-agent variant: same as #7 but with the agent inside `GroupChatBuilder` (the OP's actual orchestration), to rule out a group-chat-specific re-wrap or replay path that drops the TARC.ToolCall type. | If group chat re-encodes TARC as part of `ChatMessage.Contents` (`AIContent` polymorphism is two-deep through `ToolApprovalRequestContent`), one branch may resolve and the other not. | Test fails → root cause is in the group-chat message round-trip, not the marshaller. |
-| A3 | Same as #7 but with the JSON `JsonElement` round-tripped through `string` + `JsonDocument.Parse` between commit and retrieve (i.e. emulating the SQL `nvarchar` hop in the OP's Dapper store). | The OP uses Dapper + SQL Server. If the column / driver round-trip preserves ordering, this should be identity-preserving — but if it reorders metadata properties, the `$type` discriminator can be moved out of first position, which then requires `AllowOutOfOrderMetadataProperties = true`. Note `JsonMarshaller` only propagates that one flag from the user's `customOptions`. | Test fails when ordering is permuted before deserialization. Confirms the storage layer is reordering metadata. |
-| A4 | Same as #7 but with a non-default `JsonSerializerOptions` passed as `customOptions` to `CheckpointManager.CreateJson`, where the user-supplied options do **not** include the polymorphism resolver and `JsonMarshaller`'s `LookupTypeInfo` falls back to them for some type. | `JsonMarshaller.LookupTypeInfo` only goes to the external options when the internal chain doesn't know about the type. For most cases this won't trigger, but it's worth confirming that supplying a custom `JsonSerializerOptions` does not silently displace the internal chain. | Either the external options are never consulted for `AIContent` (good), or there is a sneak path where they are (regression). |
+| Step | Variable that changes vs. the passing tests in this repo | Why it matters | Outcome |
+|------|--------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------|
+| A1 | ~~Use `ChatClientAgent` + `ApprovalRequiredAIFunction` bound into a `WorkflowBuilder`~~ — **covered by test #7 in this PR; passes.** | Was the largest gap to the OP's repro. Closed. | OP hypothesis disproved — `TARC.ToolCall is FunctionCallContent` post-resume, and the tool is actually invoked exactly once when the approval response is sent. |
+| A2 | Multi-agent variant: same as #7 but with the agent inside a `GroupChatBuilder` (the OP's actual orchestration) with `RoundRobinGroupChatManager`. | If group chat re-encodes TARC as part of `ChatMessage.Contents` (`AIContent` polymorphism is two-deep through `ToolApprovalRequestContent`), one branch may resolve and the other not. | OP hypothesis disproved — `TARC.ToolCall is FunctionCallContent` post-resume. **But:** sending the approval response surfaces a *different* real bug — `FunctionInvokingChatClient.ExtractAndRemoveApprovalRequestsAndResponses` throws `ArgumentException: An item with the same key has already been added. Key: ficc_call-1`. Pinned by test as documented misbehavior. |
+| A3 | Same as #7 but with the checkpoint `JsonElement` round-tripped through `string` + `JsonDocument.Parse` between commit and retrieve (`StringRoundTripJsonStore`), emulating the SQL `nvarchar` hop in the OP's Dapper store. | The OP uses Dapper + SQL Server. If the column / driver round-trip preserves ordering, this should be identity-preserving — but if it reorders metadata properties, the `$type` discriminator can be moved out of first position, which then requires `AllowOutOfOrderMetadataProperties = true`. | OP hypothesis disproved — byte-preserving string round-trip is identity-preserving for the relevant payload; `TARC.ToolCall is FunctionCallContent` post-resume; tool invoked exactly once. The OP's storage layer would have to *perturb* the JSON (e.g. reorder metadata) for this to reproduce. |
+| A4 | Same as #7 but with non-default `JsonSerializerOptions` (`JsonSerializerDefaults.Web`, no AIJsonUtilities resolver) passed as `customOptions` to `CheckpointManager.CreateJson`. | `JsonMarshaller.LookupTypeInfo` only goes to the external options when the internal chain doesn't know about the type. For most cases this won't trigger, but it's worth confirming that supplying a custom `JsonSerializerOptions` does not silently displace the internal chain. | OP hypothesis disproved — custom external options that DO NOT know about `AIContent` types are correctly ignored for known types; the internal `WorkflowsJsonUtilities.DefaultOptions` chain wins. `TARC.ToolCall is FunctionCallContent` post-resume; tool invoked exactly once. |
### Track B — Validate the wire format the OP actually persists
diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs
index 331c3cc798..a3a2b2ce9b 100644
--- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs
+++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs
@@ -485,4 +485,309 @@ public class ToolApprovalRequestCheckpointReproTests
public object? GetService(Type serviceType, object? serviceKey = null) => null;
public void Dispose() { }
}
+
+ ///
+ /// Track A2 — same as the maximal repro (test #7) but with the agent inside a
+ /// GroupChatBuilder (round-robin manager with a single participant), to rule out
+ /// a group-chat-specific re-wrap or replay path that drops the
+ /// type during the checkpoint round-trip.
+ ///
+ /// Finding: the type IS preserved after
+ /// resume (consistent with tests #1 – #7 — the OP's hypothesis still does not reproduce
+ /// in this configuration). However, sending an approval response back into the resumed
+ /// group-chat workflow surfaces a different, real bug:
+ /// FunctionInvokingChatClient.ExtractAndRemoveApprovalRequestsAndResponses throws
+ /// with message
+ /// "An item with the same key has already been added. Key: ficc_call-1".
+ /// This test pins that observed behavior so future investigation can latch onto it.
+ ///
+ [Fact]
+ public async Task Repro_5350_A2_GroupChatBuilder_WithApprovalRequiredTool_JsonCheckpointResume_PreservesFunctionCallContentAsync()
+ {
+ ReproHarness harness = new();
+ Workflow workflow = AgentWorkflowBuilder
+ .CreateGroupChatBuilderWith(agents => new RoundRobinGroupChatManager(agents) { MaximumIterationCount = 4 })
+ .AddParticipants(harness.Agent)
+ .Build();
+
+ await RunReproAsync(
+ workflow,
+ harness,
+ CheckpointManager.CreateJson(new InMemoryJsonStore()),
+ scenarioName: "A2 (group chat)",
+ expectCleanPostApprovalCompletion: false,
+ postApprovalCompletionAssertion: events =>
+ {
+ // A2 currently surfaces a duplicate-key crash inside FICC when the group-chat
+ // workflow resumes with a pending approval request and the response arrives.
+ // This assertion pins the observed misbehavior. If the upstream
+ // GroupChat + AIAgentHostExecutor + FICC interaction is fixed, this test will
+ // start failing here — that's intentional. The fix should then either remove
+ // this assertion or convert it to a clean-completion assertion mirroring
+ // test #7. TODO(#5350-followup): file a separate issue for the duplicate-key
+ // crash if one does not already exist, and link it here.
+ ArgumentException? duplicateKey = events
+ .OfType()
+ .Select(e => e.Data as Exception)
+ .Select(e => e?.InnerException as ArgumentException ?? e as ArgumentException)
+ .FirstOrDefault(e => e?.Message.Contains("ficc_call-1", StringComparison.Ordinal) == true);
+ duplicateKey.Should().NotBeNull(
+ "[A2 (group chat)] approving the resumed approval request currently surfaces a duplicate-key " +
+ "ArgumentException out of FunctionInvokingChatClient.ExtractAndRemoveApprovalRequestsAndResponses — " +
+ "this is a real bug, but it is NOT the bug claimed in issue #5350. The TARC.ToolCall was already " +
+ "asserted to be a FunctionCallContent above; the OP's hypothesis remains unreproduced.");
+ });
+ }
+
+ ///
+ /// Track A3 — same as the maximal repro (test #7) but the checkpoint
+ /// is round-tripped through +
+ /// between commit and retrieve, to emulate the SQL nvarchar / Dapper hop in the
+ /// OP's setup. This catches cases where the storage layer would only fail if it perturbed
+ /// the JSON in some way (e.g. dropped metadata, re-encoded numbers), and confirms a
+ /// byte-preserving string round-trip on its own is harmless.
+ ///
+ [Fact]
+ public async Task Repro_5350_A3_StringRoundTripStore_PreservesFunctionCallContentAsync()
+ {
+ ReproHarness harness = new();
+ Workflow workflow = new WorkflowBuilder(harness.Agent).Build();
+
+ CheckpointManager checkpointManager = CheckpointManager.CreateJson(
+ new StringRoundTripJsonStore(new InMemoryJsonStore()));
+
+ await RunReproAsync(
+ workflow,
+ harness,
+ checkpointManager,
+ scenarioName: "A3 (string round-trip store)");
+ }
+
+ ///
+ /// Track A4 — same as the maximal repro (test #7) but with a non-default
+ /// passed as customOptions to
+ /// . The custom options intentionally do NOT
+ /// include the polymorphism resolver chain (AgentAbstractionsJsonUtilities →
+ /// AIJsonUtilities) — confirming that JsonMarshaller's internal
+ /// WorkflowsJsonUtilities.DefaultOptions chain always wins for known
+ /// types and the external options cannot silently displace it.
+ ///
+ [Fact]
+ public async Task Repro_5350_A4_CustomJsonSerializerOptions_PreservesFunctionCallContentAsync()
+ {
+ ReproHarness harness = new();
+ Workflow workflow = new WorkflowBuilder(harness.Agent).Build();
+
+ // Custom options that DO NOT know about AIContent / FunctionCallContent.
+ JsonSerializerOptions customOptions = new(JsonSerializerDefaults.Web)
+ {
+ WriteIndented = true,
+ };
+
+ CheckpointManager checkpointManager = CheckpointManager.CreateJson(
+ new InMemoryJsonStore(),
+ customOptions);
+
+ await RunReproAsync(
+ workflow,
+ harness,
+ checkpointManager,
+ scenarioName: "A4 (custom JsonSerializerOptions)");
+ }
+
+ ///
+ /// Shared end-to-end repro driver used by tests #7, A2, A3, A4.
+ /// Drives the workflow until an approval request appears, captures the latest checkpoint,
+ /// disposes the run, resumes from the checkpoint, and asserts that the resumed
+ /// 's is
+ /// still a . Then sends an approval response and asserts
+ /// the wrapped is invoked exactly once.
+ ///
+ private static async Task RunReproAsync(
+ Workflow workflow,
+ ReproHarness harness,
+ CheckpointManager checkpointManager,
+ string scenarioName,
+ bool expectCleanPostApprovalCompletion = true,
+ Action>? postApprovalCompletionAssertion = null)
+ {
+ InProcessExecutionEnvironment env = InProcessExecution.OffThread;
+ List inputMessages = [new(ChatRole.User, "What's the weather in Amsterdam?")];
+
+ // Act 1 — run until approval request appears and capture latest checkpoint.
+ ExternalRequest? firstRunRequest = null;
+ CheckpointInfo? checkpoint = null;
+
+ await using (StreamingRun firstRun = await env.WithCheckpointing(checkpointManager)
+ .RunStreamingAsync(workflow, inputMessages))
+ {
+ (await firstRun.TrySendMessageAsync(new TurnToken(emitEvents: false)))
+ .Should().BeTrue($"[{scenarioName}] the workflow should accept a TurnToken");
+
+ using CancellationTokenSource cts = new(TimeSpan.FromSeconds(30));
+ await foreach (WorkflowEvent evt in firstRun.WatchStreamAsync(blockOnPendingRequest: false, cts.Token))
+ {
+ if (evt is RequestInfoEvent requestInfo)
+ {
+ firstRunRequest ??= requestInfo.Request;
+ }
+
+ if (evt is SuperStepCompletedEvent step && step.CompletionInfo?.Checkpoint is { } cp)
+ {
+ checkpoint = cp;
+ }
+ }
+ }
+
+ firstRunRequest.Should().NotBeNull(
+ $"[{scenarioName}] the ChatClientAgent + FICC pipeline should surface the approval request as a workflow RequestInfoEvent");
+ checkpoint.Should().NotBeNull(
+ $"[{scenarioName}] a checkpoint should have been produced while the approval request was pending");
+ harness.ChatCallCount.Should().Be(1, $"[{scenarioName}] the mock chat client should have been called exactly once before approval was requested");
+ harness.InvocationCount.Should().Be(0, $"[{scenarioName}] the underlying tool must NOT have been invoked before approval was granted");
+
+ ToolApprovalRequestContent? preCheckpoint = firstRunRequest!.Data.As();
+ preCheckpoint.Should().NotBeNull($"[{scenarioName}] the pending external request should carry a ToolApprovalRequestContent payload");
+ preCheckpoint!.ToolCall.Should().BeOfType(
+ $"[{scenarioName}] the pre-checkpoint pending request payload must already be a FunctionCallContent");
+
+ // Act 2 — resume from checkpoint on a fresh handle.
+ ExternalRequest? resumedRequest = null;
+ List postResumeEvents = [];
+
+ await using (StreamingRun resumed = await env.WithCheckpointing(checkpointManager)
+ .ResumeStreamingAsync(workflow, checkpoint!))
+ {
+ using CancellationTokenSource cts = new(TimeSpan.FromSeconds(30));
+ await foreach (WorkflowEvent evt in resumed.WatchStreamAsync(blockOnPendingRequest: false, cts.Token))
+ {
+ if (evt is RequestInfoEvent requestInfo)
+ {
+ resumedRequest ??= requestInfo.Request;
+ }
+ }
+
+ resumedRequest.Should().NotBeNull(
+ $"[{scenarioName}] the resumed workflow should re-emit the pending approval RequestInfoEvent");
+
+ ToolApprovalRequestContent? postResume = resumedRequest!.Data.As();
+ postResume.Should().NotBeNull(
+ $"[{scenarioName}] ExternalRequest.Data.As() should materialize the payload after JSON-checkpoint resume");
+ postResume!.ToolCall.Should().NotBeNull($"[{scenarioName}] the resumed TARC must carry its ToolCall");
+ postResume.ToolCall.Should().BeOfType(
+ $"[{scenarioName}] after CheckpointManager.CreateJson round-trip via ResumeStreamingAsync, " +
+ "ToolApprovalRequestContent.ToolCall must still be a FunctionCallContent so that " +
+ "FunctionInvokingChatClient's pattern match (`tarc.ToolCall is FunctionCallContent`) continues to fire (issue #5350).");
+
+ // Act 3 — approve and finish the workflow.
+ ToolApprovalResponseContent approvalResponse = postResume.CreateResponse(approved: true);
+ await resumed.SendResponseAsync(resumedRequest.CreateResponse(approvalResponse));
+
+ using CancellationTokenSource cts2 = new(TimeSpan.FromSeconds(30));
+ await foreach (WorkflowEvent evt in resumed.WatchStreamAsync(blockOnPendingRequest: false, cts2.Token))
+ {
+ postResumeEvents.Add(evt);
+ }
+ }
+
+ if (expectCleanPostApprovalCompletion)
+ {
+ harness.InvocationCount.Should().Be(1,
+ $"[{scenarioName}] approving the request should cause FunctionInvokingChatClient to invoke the wrapped AIFunction exactly once");
+ postResumeEvents.OfType().Should().BeEmpty(
+ $"[{scenarioName}] no workflow errors should be raised when responding to the resumed approval request");
+ postResumeEvents.OfType().Should().BeEmpty(
+ $"[{scenarioName}] no executor failures should be raised when responding to the resumed approval request");
+ }
+
+ postApprovalCompletionAssertion?.Invoke(postResumeEvents);
+ }
+
+ ///
+ /// Bundles a + counting
+ /// + in a single reusable harness so the four end-to-end repro
+ /// variants (#7, A2, A3, A4) can share identical setup.
+ ///
+ private sealed class ReproHarness
+ {
+ public const string ToolName = "GetWeather";
+ public const string ToolResultText = "Sunny, 22°C";
+ public const string ToolCallId = "call-1";
+ public const string FinalAssistantText = "The weather in Amsterdam is sunny and 22°C.";
+
+ private int _invocationCount;
+ private int _chatCallIndex;
+
+ public int InvocationCount => Volatile.Read(ref this._invocationCount);
+ public int ChatCallCount => Volatile.Read(ref this._chatCallIndex);
+
+ public ChatClientAgent Agent { get; }
+
+ public ReproHarness()
+ {
+ AIFunction underlyingTool = AIFunctionFactory.Create(
+ ([Description("City to look up")] string city) =>
+ {
+ Interlocked.Increment(ref this._invocationCount);
+ return ToolResultText;
+ },
+ name: ToolName,
+ description: "Gets the weather for the given city");
+
+ ApprovalRequiredAIFunction approvalTool = new(underlyingTool);
+
+ MockChatClient mockChatClient = new((messages, options) =>
+ {
+ int index = Interlocked.Increment(ref this._chatCallIndex) - 1;
+ return index switch
+ {
+ 0 => new ChatResponse(new ChatMessage(ChatRole.Assistant,
+ [new FunctionCallContent(
+ callId: ToolCallId,
+ name: ToolName,
+ arguments: new Dictionary { ["city"] = "Amsterdam" })])),
+ _ => new ChatResponse(new ChatMessage(ChatRole.Assistant, FinalAssistantText)),
+ };
+ });
+
+ this.Agent = new ChatClientAgent(
+ mockChatClient,
+ instructions: "You are a weather agent.",
+ name: "WeatherAgent",
+ tools: [approvalTool]);
+ }
+ }
+
+ ///
+ /// Wraps a and forces every to
+ /// round-trip through GetRawText() +
+ /// during commit and again during retrieve. This emulates a Dapper-backed SQL store where
+ /// the is materialized to a string for the nvarchar(max)
+ /// column on the way in and re-parsed on the way back out. Used by the A3 test only;
+ /// the double parse-and-clone is intentionally extra work and not suitable for production use.
+ ///
+ private sealed class StringRoundTripJsonStore(JsonCheckpointStore inner) : JsonCheckpointStore
+ {
+ public override async ValueTask CreateCheckpointAsync(string sessionId, JsonElement value, CheckpointInfo? parent = null)
+ {
+ JsonElement roundTripped = RoundTrip(value);
+ return await inner.CreateCheckpointAsync(sessionId, roundTripped, parent).ConfigureAwait(false);
+ }
+
+ public override async ValueTask RetrieveCheckpointAsync(string sessionId, CheckpointInfo key)
+ {
+ JsonElement raw = await inner.RetrieveCheckpointAsync(sessionId, key).ConfigureAwait(false);
+ return RoundTrip(raw);
+ }
+
+ public override ValueTask> RetrieveIndexAsync(string sessionId, CheckpointInfo? withParent = null)
+ => inner.RetrieveIndexAsync(sessionId, withParent);
+
+ private static JsonElement RoundTrip(JsonElement element)
+ {
+ string raw = element.GetRawText();
+ using JsonDocument doc = JsonDocument.Parse(raw);
+ return doc.RootElement.Clone();
+ }
+ }
}