mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
3e03a305f6
* Implement annotation-based context compaction Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Handle missing compaction attributes in BaseChatClient Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix CI typing and bandit issues Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Optimize incremental compaction annotation pass Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * refinement * Python: add ToolResultCompactionStrategy and CompactionProvider Add ToolResultCompactionStrategy that collapses older tool-call groups into short summary messages (e.g. [Tool calls: get_weather]) while keeping the most recent groups verbatim. This mirrors the .NET ToolResultCompactionStrategy from PR #4533. Add CompactionProvider as a context-provider that auto-applies compaction before each agent turn and stores compacted history in session state after each turn. Includes tests and samples for both features. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * refinement and alignment with dotnet PR * updated tool result compaction * updated tool result compaction * Python: add ToolResultCompactionStrategy, CompactionProvider, and skip_excluded - ToolResultCompactionStrategy collapses older tool-call groups into [Tool results: func_name: result] summaries with bidirectional tracing (same pattern as SummarizationStrategy). - CompactionProvider as BaseContextProvider with separate before_strategy and after_strategy parameters. before_strategy compacts loaded context; after_strategy compacts stored history via history_source_id. - InMemoryHistoryProvider gains skip_excluded flag to filter out messages marked as excluded by compaction strategies. - Tests, samples, and exports updated. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fixed checks * fix mypy * Fix: ensure summary messages from both strategies get full compaction annotations SummarizationStrategy was not calling annotate_message_groups after inserting its summary message, so the summary lacked core group annotations (id, kind, index, has_reasoning, _excluded). Added the missing call. ToolResultCompactionStrategy already had it. Added tests verifying both strategies produce fully annotated summaries. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * updated propagation * fix mypy --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
145 lines
4.7 KiB
Python
145 lines
4.7 KiB
Python
# Copyright (c) Microsoft. All rights reserved.
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from collections.abc import Awaitable, Mapping, Sequence
|
|
from typing import Any
|
|
|
|
from agent_framework import (
|
|
GROUP_ANNOTATION_KEY,
|
|
GROUP_TOKEN_COUNT_KEY,
|
|
Agent,
|
|
BaseChatClient,
|
|
ChatResponse,
|
|
Message,
|
|
SlidingWindowStrategy,
|
|
TruncationStrategy,
|
|
)
|
|
|
|
"""This sample demonstrates client defaults, agent overrides, and run-level overrides for in-run compaction.
|
|
|
|
Key components:
|
|
- A shared client with default `compaction_strategy` and `tokenizer`
|
|
- An agent-level override that takes precedence over the shared client defaults
|
|
- A run-level override passed through `agent.run(...)`
|
|
"""
|
|
|
|
|
|
class FixedTokenizer:
|
|
"""Simple tokenizer used to make token annotations easy to inspect."""
|
|
|
|
def __init__(self, token_count: int) -> None:
|
|
self._token_count = token_count
|
|
|
|
def count_tokens(self, text: str) -> int:
|
|
return self._token_count
|
|
|
|
|
|
class InspectingChatClient(BaseChatClient[Any]):
|
|
"""Chat client that records the messages it receives after compaction."""
|
|
|
|
def __init__(self, **kwargs: Any) -> None:
|
|
super().__init__(**kwargs)
|
|
self.last_messages: list[Message] = []
|
|
|
|
def _inner_get_response(
|
|
self,
|
|
*,
|
|
messages: Sequence[Message],
|
|
stream: bool,
|
|
options: Mapping[str, Any],
|
|
**kwargs: Any,
|
|
) -> Awaitable[ChatResponse]:
|
|
if stream:
|
|
raise ValueError("This sample only demonstrates non-streaming responses.")
|
|
|
|
self.last_messages = list(messages)
|
|
|
|
async def _get_response() -> ChatResponse:
|
|
return ChatResponse(messages=[Message(role="assistant", text="done")])
|
|
|
|
return _get_response()
|
|
|
|
|
|
def _build_messages() -> list[Message]:
|
|
return [
|
|
Message(role="user", text="Collect the deployment requirements."),
|
|
Message(role="assistant", text="I will gather the constraints first."),
|
|
Message(role="user", text="Summarize the rollout risks."),
|
|
Message(role="assistant", text="The main risks are drift, downtime, and rollback gaps."),
|
|
]
|
|
|
|
|
|
def _token_count(message: Message) -> int | None:
|
|
group_annotation = message.additional_properties.get(GROUP_ANNOTATION_KEY)
|
|
if not isinstance(group_annotation, dict):
|
|
return None
|
|
value = group_annotation.get(GROUP_TOKEN_COUNT_KEY)
|
|
return value if isinstance(value, int) else None
|
|
|
|
|
|
def _print_model_input(title: str, client: InspectingChatClient) -> None:
|
|
print(f"\n{title}")
|
|
print(f"Model receives {len(client.last_messages)} message(s):")
|
|
for message in client.last_messages:
|
|
print(f"- [{message.role}] {message.text} ({_token_count(message)} tokens)")
|
|
|
|
|
|
async def main() -> None:
|
|
# 1. Create one shared client with default compaction settings.
|
|
shared_client = InspectingChatClient(
|
|
compaction_strategy=TruncationStrategy(max_n=3, compact_to=2),
|
|
tokenizer=FixedTokenizer(7),
|
|
)
|
|
|
|
# 2. Create one agent that relies on the client defaults.
|
|
client_default_agent = Agent(client=shared_client, name="ClientDefaultAgent")
|
|
|
|
# 3. Create another agent that overrides the shared client's defaults.
|
|
agent_override = Agent(
|
|
client=shared_client,
|
|
name="AgentOverrideAgent",
|
|
compaction_strategy=SlidingWindowStrategy(keep_last_groups=3),
|
|
tokenizer=FixedTokenizer(11),
|
|
)
|
|
|
|
# 4. Run the first agent; the client defaults are applied.
|
|
await client_default_agent.run(_build_messages())
|
|
_print_model_input("1. Client default compaction", shared_client)
|
|
|
|
# 5. Run the second agent; the agent-level override wins over the client defaults.
|
|
await agent_override.run(_build_messages())
|
|
_print_model_input("2. Agent-level override", shared_client)
|
|
|
|
# 6. Override both settings for a single run; the per-run values win over both.
|
|
await agent_override.run(
|
|
_build_messages(),
|
|
compaction_strategy=TruncationStrategy(max_n=2, compact_to=1),
|
|
tokenizer=FixedTokenizer(23),
|
|
)
|
|
_print_model_input("3. Per-run override", shared_client)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|
|
|
|
"""
|
|
Sample output:
|
|
|
|
1. Client default compaction
|
|
Model receives 2 message(s):
|
|
- [user] Summarize the rollout risks. (7 tokens)
|
|
- [assistant] The main risks are drift, downtime, and rollback gaps. (7 tokens)
|
|
|
|
2. Agent-level override
|
|
Model receives 3 message(s):
|
|
- [assistant] I will gather the constraints first. (11 tokens)
|
|
- [user] Summarize the rollout risks. (11 tokens)
|
|
- [assistant] The main risks are drift, downtime, and rollback gaps. (11 tokens)
|
|
|
|
3. Per-run override
|
|
Model receives 1 message(s):
|
|
- [assistant] The main risks are drift, downtime, and rollback gaps. (23 tokens)
|
|
"""
|