From 8d503c03aa84cdeb1177fe18f4fe4bee3117aa0d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 19 May 2026 13:53:09 +0000 Subject: [PATCH] Add maximal E2E ChatClientAgent + ApprovalRequiredAIFunction repro for #5350 Agent-Logs-Url: https://github.com/microsoft/agent-framework/sessions/baf2578f-bdc5-479b-8ebc-1b198f26c151 Co-authored-by: lokitoth <6936551+lokitoth@users.noreply.github.com> --- .../issue-5350-root-cause-validation-plan.md | 37 ++- ...ToolApprovalRequestCheckpointReproTests.cs | 232 ++++++++++++++++++ 2 files changed, 257 insertions(+), 12 deletions(-) diff --git a/docs/working/issue-5350-root-cause-validation-plan.md b/docs/working/issue-5350-root-cause-validation-plan.md index 530d681827..61360ba36a 100644 --- a/docs/working/issue-5350-root-cause-validation-plan.md +++ b/docs/working/issue-5350-root-cause-validation-plan.md @@ -4,9 +4,12 @@ **Status of repro:** A focused repro test class `dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs` -was added covering six progressively more end-to-end variants of the path the issue -describes. **All six tests pass** on `main`, i.e. *the bug as described does not -reproduce* at any of the layers exercised here: +was added covering seven progressively more end-to-end variants of the path the issue +describes — including a **maximal** end-to-end test that uses a real +`ChatClientAgent` driven by `FunctionInvokingChatClient` with an +`ApprovalRequiredAIFunction`. **All seven tests pass** on `main`, consistently across +5 back-to-back runs, i.e. *the bug as described does not reproduce* at any of the +layers exercised here: | # | Test | What it exercises | Result | |---|-------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------| @@ -16,6 +19,7 @@ reproduce* at any of the layers exercised here: | 4 | `Repro_5350_DirectJsonMarshallerRoundtrip_IsDeterministic` | 25× repetition of #1 to rule out flakiness / JIT order | Pass | | 5 | `Repro_5350_CaptureWireFormat_ForInspection` | Captures the on-the-wire shape so we can compare against the OP's SQL row | Pass | | 6 | `Repro_5350_EndToEnd_JsonCheckpointResume_PreservesFunctionCallContentAsync` | Full `CheckpointManager.CreateJson(InMemoryJsonStore) → RunStreamingAsync → SuperStep checkpoint → ResumeStreamingAsync` cycle with a `RequestPort` | Pass | +| 7 | `Repro_5350_EndToEnd_ChatClientAgent_WithApprovalRequiredTool_JsonCheckpointResume_PreservesFunctionCallContentAndInvokesToolAsync` | **Maximal**: `ChatClientAgent` over a `MockChatClient` with an `ApprovalRequiredAIFunction`, single-agent `WorkflowBuilder` (no orchestration), `CheckpointManager.CreateJson(InMemoryJsonStore)`. Asserts both that the resumed `RequestInfoEvent.Request` still carries a `FunctionCallContent` AND that approving the request actually invokes the underlying `AIFunction` and lets the workflow continue. | Pass | This is **consistent with [@lokitoth's second comment](https://github.com/microsoft/agent-framework/issues/5350#issuecomment-4379664401)**: @@ -71,16 +75,19 @@ the OP's reported scenario, varying one dimension at a time. ### Track A — Reproduce in a configuration closer to the OP's pattern "B" -The OP's repro path differs from the new tests in three concrete ways. Each is a -candidate root cause; the test matrix below isolates them. Each row is "add a -test that reproduces the OP's symptom (`postResume.ToolCall is not FunctionCallContent`)". +The OP's repro path differs from the new tests in three concrete ways. Test #7 (the +maximal repro added to this PR) closes the biggest gap — it uses a real +`ChatClientAgent` + `FunctionInvokingChatClient` + `ApprovalRequiredAIFunction` and +still passes — but the remaining differences are still worth isolating. Each row below +is "add a test that reproduces the OP's symptom (`postResume.ToolCall is not +FunctionCallContent`)". | Step | Variable that changes vs. the passing tests in this repo | Why it matters | Pass criteria | |------|--------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------| -| A1 | Use `ChatClientAgent` + `ApprovalRequiredAIFunction` bound into a `WorkflowBuilder` (the GroupChatToolApproval "pattern B" path), with a fake `IChatClient` that emits a single `FunctionCallContent`. | This is the only major piece of the OP's setup that the current tests do not exercise. The TARC in this path is *generated* by `FunctionInvokingChatClient` and flows through the `AIAgentHostExecutor`. | Test fails (TARC.ToolCall comes back as base type) → root cause is in the agent-host/FICC bridge, not the marshaller. | -| A2 | Same as A1 but with the request payload also surfaced as part of a `ChatMessage.Contents`-style transport (whatever the agent host actually serializes through). Inspect the on-the-wire JSON for the inner `toolCall` and look for an absent or differently-named `$type`. | If the TARC is round-tripped as `AIContent`/`ChatMessage` instead of as itself, the polymorphism is two-deep (`AIContent` → TARC, TARC → ToolCall). It is plausible that one branch resolves but the other doesn't. | Find missing `$type` in serialized payload. | -| A3 | Same as the passing test #6 in this file, but with the SQL Server-style store that round-trips the `JsonElement` through `string` and back (e.g. `element.GetRawText()` → `JsonDocument.Parse`). | The OP uses Dapper + SQL Server. If the column is `nvarchar` and the round-trip preserves ordering, this should be identity-preserving — but if the OP uses `jsonb`-like storage that reorders metadata properties, the `$type` discriminator can be moved out of "first" position, which then requires `AllowOutOfOrderMetadataProperties = true`. Note `JsonMarshaller` only propagates that one flag from the user's `customOptions`. | Test fails when ordering is permuted before deserialization. Confirms the storage layer is reordering metadata. | -| A4 | Same as #6 but with a non-default `JsonSerializerOptions` passed as `customOptions` to `CheckpointManager.CreateJson`, where the user-supplied options do **not** include the polymorphism resolver and `JsonMarshaller`'s `LookupTypeInfo` falls back to them for some type. | `JsonMarshaller.LookupTypeInfo` only goes to the external options when the internal chain doesn't know about the type. For most cases this won't trigger, but it's worth confirming that supplying a custom `JsonSerializerOptions` does not silently displace the internal chain. | Either the external options are never consulted for `AIContent` (good), or there is a sneak path where they are (regression). | +| A1 | ~~Use `ChatClientAgent` + `ApprovalRequiredAIFunction` bound into a `WorkflowBuilder`~~ — **covered by test #7 in this PR; passes.** | Was the largest gap to the OP's repro. Closed. | N/A — already passing. | +| A2 | Multi-agent variant: same as #7 but with the agent inside `GroupChatBuilder` (the OP's actual orchestration), to rule out a group-chat-specific re-wrap or replay path that drops the TARC.ToolCall type. | If group chat re-encodes TARC as part of `ChatMessage.Contents` (`AIContent` polymorphism is two-deep through `ToolApprovalRequestContent`), one branch may resolve and the other not. | Test fails → root cause is in the group-chat message round-trip, not the marshaller. | +| A3 | Same as #7 but with the JSON `JsonElement` round-tripped through `string` + `JsonDocument.Parse` between commit and retrieve (i.e. emulating the SQL `nvarchar` hop in the OP's Dapper store). | The OP uses Dapper + SQL Server. If the column / driver round-trip preserves ordering, this should be identity-preserving — but if it reorders metadata properties, the `$type` discriminator can be moved out of first position, which then requires `AllowOutOfOrderMetadataProperties = true`. Note `JsonMarshaller` only propagates that one flag from the user's `customOptions`. | Test fails when ordering is permuted before deserialization. Confirms the storage layer is reordering metadata. | +| A4 | Same as #7 but with a non-default `JsonSerializerOptions` passed as `customOptions` to `CheckpointManager.CreateJson`, where the user-supplied options do **not** include the polymorphism resolver and `JsonMarshaller`'s `LookupTypeInfo` falls back to them for some type. | `JsonMarshaller.LookupTypeInfo` only goes to the external options when the internal chain doesn't know about the type. For most cases this won't trigger, but it's worth confirming that supplying a custom `JsonSerializerOptions` does not silently displace the internal chain. | Either the external options are never consulted for `AIContent` (good), or there is a sneak path where they are (regression). | ### Track B — Validate the wire format the OP actually persists @@ -130,8 +137,14 @@ The combination of: - the resolver chain in `WorkflowsJsonUtilities.CreateDefaultOptions()` already putting `AgentAbstractionsJsonUtilities.DefaultOptions.TypeInfoResolver` first (which itself puts `AIJsonUtilities.DefaultOptions.TypeInfoResolver` first), -- the wire format captured above showing `"$type": "functionCall"` is present, and -- the full `Run → checkpoint → Resume` test in this PR passing, +- the wire format captured above showing `"$type": "functionCall"` is present, +- the full `Run → checkpoint → Resume` test in this PR passing for a plain + `RequestPort` workflow, **and** +- the maximal `ChatClientAgent` + `FunctionInvokingChatClient` + + `ApprovalRequiredAIFunction` test in this PR also passing — including the + assertion that the wrapped `AIFunction` is actually invoked exactly once after + approval and that the workflow then receives the resulting + `FunctionResultContent` and produces a final assistant message, is sufficient to **disprove** the OP's stated hypothesis. Once Track A or Track B identifies the actual cause, the issue should be updated with a brief diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs index 8f61fca51a..331c3cc798 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/ToolApprovalRequestCheckpointReproTests.cs @@ -2,6 +2,9 @@ using System; using System.Collections.Generic; +using System.ComponentModel; +using System.Linq; +using System.Runtime.CompilerServices; using System.Text.Json; using System.Threading; using System.Threading.Tasks; @@ -253,4 +256,233 @@ public class ToolApprovalRequestCheckpointReproTests serialized.Should().NotBeNullOrEmpty(); serialized.Should().Contain(CallId, "the call id should be present in the serialized form"); } + + /// + /// Maximal end-to-end repro for issue #5350 using the same shape as the OP's reported + /// scenario (pattern "B" in the GroupChatToolApproval sample), but with a single + /// bound directly into a + /// (no orchestration), and with a real -wrapped + /// tool that the agent actually attempts to call. The test: + /// + /// builds a over a that + /// returns a on the first turn and a final assistant + /// text on the second turn (so converts the FCC + /// to a and surfaces it as a workflow + /// ), + /// persists checkpoints via the OP's exact path — + /// CheckpointManager.CreateJson(InMemoryJsonStore) + + /// InProcessExecutionEnvironment.WithCheckpointing(...).RunStreamingAsync(...) — + /// so every checkpoint is round-tripped through the same + + /// pipeline the OP's SQL-backed store uses, + /// validates that the first-run carries a + /// whose ToolCall is a + /// , + /// disposes the run and resumes from the last + /// checkpoint via , + /// validates the re-emitted still carries a + /// whose ToolCall is a + /// — this is the assertion the OP claims fails, + /// sends an approval response back into the resumed run and asserts the wrapped + /// function is actually invoked (counter increments) and the workflow completes with a + /// final assistant message. + /// + /// + [Fact] + public async Task Repro_5350_EndToEnd_ChatClientAgent_WithApprovalRequiredTool_JsonCheckpointResume_PreservesFunctionCallContentAndInvokesToolAsync() + { + // Arrange — counting tool wrapped for approval + int invocationCount = 0; + const string ToolName = "GetWeather"; + const string ToolResultText = "Sunny, 22°C"; + + AIFunction underlyingTool = AIFunctionFactory.Create( + ([Description("City to look up")] string city) => + { + Interlocked.Increment(ref invocationCount); + return ToolResultText; + }, + name: ToolName, + description: "Gets the weather for the given city"); + + ApprovalRequiredAIFunction approvalTool = new(underlyingTool); + + // Arrange — mock chat client that turn-1 emits an FCC for the approval-required tool, + // turn-2 (after FunctionInvokingChatClient processes the approval + invokes the tool + + // appends a FunctionResultContent) emits a final assistant text. + const string ToolCallId = "call-1"; + const string FinalAssistantText = "The weather in Amsterdam is sunny and 22°C."; + int chatCallIndex = 0; + List> capturedInputs = new(); + + MockChatClient mockChatClient = new((messages, options) => + { + // Capture a snapshot of the inputs the agent passed in for this service call so the + // test can later assert the FunctionResultContent flowed back to the model. + capturedInputs.Add(new List(messages)); + + int index = Interlocked.Increment(ref chatCallIndex) - 1; + return index switch + { + 0 => new ChatResponse(new ChatMessage(ChatRole.Assistant, + [new FunctionCallContent( + callId: ToolCallId, + name: ToolName, + arguments: new Dictionary { ["city"] = "Amsterdam" })])), + _ => new ChatResponse(new ChatMessage(ChatRole.Assistant, FinalAssistantText)), + }; + }); + + ChatClientAgent agent = new( + mockChatClient, + instructions: "You are a weather agent.", + name: "WeatherAgent", + tools: [approvalTool]); + + // Arrange — single-agent workflow. The AIAgent is auto-promoted to an ExecutorBinding + // via the implicit operator on ExecutorBinding. + Workflow workflow = new WorkflowBuilder(agent).Build(); + + // Arrange — JSON checkpoint manager backed by an in-memory JSON store. This mirrors + // the OP's "JsonCheckpointStore-backed CheckpointManager.CreateJson(...)" path — + // every checkpoint is round-tripped through the same JsonMarshaller + + // PortableValueConverter that the OP's SQL-backed store uses, just without the + // disk/SQL hop. + CheckpointManager checkpointManager = CheckpointManager.CreateJson(new InMemoryJsonStore()); + + InProcessExecutionEnvironment env = InProcessExecution.OffThread; + List inputMessages = [new(ChatRole.User, "What's the weather in Amsterdam?")]; + + // Act 1 — run until we see the approval request, then capture the latest checkpoint. + ExternalRequest? firstRunRequest = null; + CheckpointInfo? checkpoint = null; + + await using (StreamingRun firstRun = await env.WithCheckpointing(checkpointManager) + .RunStreamingAsync(workflow, inputMessages)) + { + // Trigger an actual turn — without a TurnToken the AIAgentHostExecutor will not + // invoke the agent. This matches the GroupChatToolApproval sample and the + // StreamAsyncWithTurnTokenShouldExecuteWorkflow pattern in InProcessExecutionTests. + (await firstRun.TrySendMessageAsync(new TurnToken(emitEvents: false))) + .Should().BeTrue("the workflow should accept a TurnToken"); + + using CancellationTokenSource cts = new(TimeSpan.FromSeconds(30)); + await foreach (WorkflowEvent evt in firstRun.WatchStreamAsync(blockOnPendingRequest: false, cts.Token)) + { + if (evt is RequestInfoEvent requestInfo) + { + firstRunRequest ??= requestInfo.Request; + } + + if (evt is SuperStepCompletedEvent step && step.CompletionInfo?.Checkpoint is { } cp) + { + checkpoint = cp; + } + } + } + + firstRunRequest.Should().NotBeNull( + "the ChatClientAgent + FICC pipeline should have surfaced the approval request as a workflow RequestInfoEvent"); + checkpoint.Should().NotBeNull( + "a checkpoint should have been produced while the approval request was pending"); + chatCallIndex.Should().Be(1, "the mock chat client should have been called exactly once before the approval was requested"); + invocationCount.Should().Be(0, "the underlying tool must NOT have been invoked before the approval was granted"); + + ToolApprovalRequestContent? preCheckpoint = firstRunRequest!.Data.As(); + preCheckpoint.Should().NotBeNull("the pending external request should carry a ToolApprovalRequestContent payload"); + preCheckpoint!.ToolCall.Should().BeOfType( + "the pre-checkpoint pending request payload must already be a FunctionCallContent"); + + // Act 2 — resume from the checkpoint with a brand-new env / handle so that any + // in-process AIAgentHostExecutor instance state is gone and everything has to be + // rehydrated from the on-disk JSON. + ExternalRequest? resumedRequest = null; + List postResumeEvents = []; + + await using (StreamingRun resumed = await env.WithCheckpointing(checkpointManager) + .ResumeStreamingAsync(workflow, checkpoint!)) + { + using CancellationTokenSource cts = new(TimeSpan.FromSeconds(30)); + + // First pass: see the re-emitted RequestInfoEvent, but don't block on it. + await foreach (WorkflowEvent evt in resumed.WatchStreamAsync(blockOnPendingRequest: false, cts.Token)) + { + if (evt is RequestInfoEvent requestInfo) + { + resumedRequest ??= requestInfo.Request; + } + } + + resumedRequest.Should().NotBeNull( + "the resumed workflow should re-emit the pending approval RequestInfoEvent"); + + // The core issue #5350 assertion. + ToolApprovalRequestContent? postResume = resumedRequest!.Data.As(); + postResume.Should().NotBeNull( + "ExternalRequest.Data.As() should materialize the payload after a JSON-file checkpoint resume"); + postResume!.ToolCall.Should().NotBeNull("the resumed TARC must carry its ToolCall"); + postResume.ToolCall.Should().BeOfType( + "after CheckpointManager.CreateJson(InMemoryJsonStore) round-trip via " + + "ResumeStreamingAsync, ToolApprovalRequestContent.ToolCall must still be a " + + "FunctionCallContent so that FunctionInvokingChatClient's pattern match " + + "(`tarc.ToolCall is FunctionCallContent`) continues to fire (issue #5350)."); + + FunctionCallContent resumedFcc = (FunctionCallContent)postResume.ToolCall; + resumedFcc.Name.Should().Be(ToolName); + resumedFcc.CallId.Should().EndWith(ToolCallId, + "the workflow rewrites the CallId with an executor-scoped prefix, but should preserve the original tail"); + + // Act 3 — send the approval response back into the resumed run and watch the + // remaining stream. This drives FunctionInvokingChatClient through its + // post-approval branch, where it must invoke the underlying AIFunction, append a + // FunctionResultContent, and call the model a second time. + ToolApprovalResponseContent approvalResponse = postResume.CreateResponse(approved: true); + await resumed.SendResponseAsync(resumedRequest.CreateResponse(approvalResponse)); + + using CancellationTokenSource cts2 = new(TimeSpan.FromSeconds(30)); + await foreach (WorkflowEvent evt in resumed.WatchStreamAsync(blockOnPendingRequest: false, cts2.Token)) + { + postResumeEvents.Add(evt); + } + } + + // Assert 3 — the tool actually got called as part of the approval round-trip, and + // the workflow continued without raising errors. + invocationCount.Should().Be(1, + "approving the request should cause FunctionInvokingChatClient to invoke the wrapped AIFunction exactly once"); + chatCallIndex.Should().Be(2, + "after the tool was invoked, FunctionInvokingChatClient should have made a second chat-client call to produce the final assistant message"); + capturedInputs.Should().HaveCount(2); + capturedInputs[1].Should().Contain( + m => m.Contents.OfType().Any(), + "the second chat-client call must include the FunctionResultContent produced by the approved tool invocation"); + + postResumeEvents.OfType().Should().BeEmpty( + "no workflow errors should be raised when responding to the resumed approval request"); + postResumeEvents.OfType().Should().BeEmpty( + "no executor failures should be raised when responding to the resumed approval request"); + } + + /// + /// Minimal stub for repro tests; delegates each call to a caller-supplied factory. + /// + private sealed class MockChatClient(Func, ChatOptions?, ChatResponse> responseFactory) : IChatClient + { + public Task GetResponseAsync(IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) + => Task.FromResult(responseFactory(messages, options)); + + public async IAsyncEnumerable GetStreamingResponseAsync( + IEnumerable messages, + ChatOptions? options = null, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + ChatResponse response = await this.GetResponseAsync(messages, options, cancellationToken).ConfigureAwait(false); + foreach (ChatResponseUpdate update in response.ToChatResponseUpdates()) + { + yield return update; + } + } + + public object? GetService(Type serviceType, object? serviceKey = null) => null; + public void Dispose() { } + } }