agent-framework/python/samples/02-agents/compaction/agent_client_overrides.py

# Copyright (c) Microsoft. All rights reserved.

from __future__ import annotations

import asyncio
from collections.abc import Awaitable, Mapping, Sequence
from typing import Any

from agent_framework import (
    GROUP_ANNOTATION_KEY,
    GROUP_TOKEN_COUNT_KEY,
    Agent,
    BaseChatClient,
    ChatResponse,
    Message,
    SlidingWindowStrategy,
    TruncationStrategy,
)

"""This sample demonstrates client defaults, agent overrides, and run-level overrides for in-run compaction.

Key components:
- A shared client with default `compaction_strategy` and `tokenizer`
- An agent-level override that takes precedence over the shared client defaults
- A run-level override passed through `agent.run(...)`
"""


class FixedTokenizer:
    """Simple tokenizer used to make token annotations easy to inspect."""

    def __init__(self, token_count: int) -> None:
        self._token_count = token_count

    def count_tokens(self, text: str) -> int:
        return self._token_count


class InspectingChatClient(BaseChatClient[Any]):
    """Chat client that records the messages it receives after compaction."""

    def __init__(self, **kwargs: Any) -> None:
        super().__init__(**kwargs)
        self.last_messages: list[Message] = []

    def _inner_get_response(
        self,
        *,
        messages: Sequence[Message],
        stream: bool,
        options: Mapping[str, Any],
        **kwargs: Any,
    ) -> Awaitable[ChatResponse]:
        if stream:
            raise ValueError("This sample only demonstrates non-streaming responses.")

        self.last_messages = list(messages)

        async def _get_response() -> ChatResponse:
            return ChatResponse(messages=[Message(role="assistant", contents=["done"])])

        return _get_response()


def _build_messages() -> list[Message]:
    return [
        Message(role="user", contents=["Collect the deployment requirements."]),
        Message(role="assistant", contents=["I will gather the constraints first."]),
        Message(role="user", contents=["Summarize the rollout risks."]),
        Message(role="assistant", contents=["The main risks are drift, downtime, and rollback gaps."]),
    ]


def _token_count(message: Message) -> int | None:
    group_annotation = message.additional_properties.get(GROUP_ANNOTATION_KEY)
    if not isinstance(group_annotation, dict):
        return None
    value = group_annotation.get(GROUP_TOKEN_COUNT_KEY)
    return value if isinstance(value, int) else None


def _print_model_input(title: str, client: InspectingChatClient) -> None:
    print(f"\n{title}")
    print(f"Model receives {len(client.last_messages)} message(s):")
    for message in client.last_messages:
        print(f"- [{message.role}] {message.text} ({_token_count(message)} tokens)")


async def main() -> None:
    # 1. Create one shared client with default compaction settings.
    shared_client = InspectingChatClient(
        compaction_strategy=TruncationStrategy(max_n=3, compact_to=2),
        tokenizer=FixedTokenizer(7),
    )

    # 2. Create one agent that relies on the client defaults.
    client_default_agent = Agent(client=shared_client, name="ClientDefaultAgent")

    # 3. Create another agent that overrides the shared client's defaults.
    agent_override = Agent(
        client=shared_client,
        name="AgentOverrideAgent",
        compaction_strategy=SlidingWindowStrategy(keep_last_groups=3),
        tokenizer=FixedTokenizer(11),
    )

    # 4. Run the first agent; the client defaults are applied.
    await client_default_agent.run(_build_messages())
    _print_model_input("1. Client default compaction", shared_client)

    # 5. Run the second agent; the agent-level override wins over the client defaults.
    await agent_override.run(_build_messages())
    _print_model_input("2. Agent-level override", shared_client)

    # 6. Override both settings for a single run; the per-run values win over both.
    await agent_override.run(
        _build_messages(),
        compaction_strategy=TruncationStrategy(max_n=2, compact_to=1),
        tokenizer=FixedTokenizer(23),
    )
    _print_model_input("3. Per-run override", shared_client)


if __name__ == "__main__":
    asyncio.run(main())

"""
Sample output:

1. Client default compaction
Model receives 2 message(s):
- [user] Summarize the rollout risks. (7 tokens)
- [assistant] The main risks are drift, downtime, and rollback gaps. (7 tokens)

2. Agent-level override
Model receives 3 message(s):
- [assistant] I will gather the constraints first. (11 tokens)
- [user] Summarize the rollout risks. (11 tokens)
- [assistant] The main risks are drift, downtime, and rollback gaps. (11 tokens)

3. Per-run override
Model receives 1 message(s):
- [assistant] The main risks are drift, downtime, and rollback gaps. (23 tokens)
"""