mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: HarnessAgent: Disable compaction when max tokens not provided (#6410)
* HarnessAgent: Disable compaction when max tokens not provided * Fix regression. * Address PR comments * Require max_output_tokens to be positive Reject max_output_tokens=0 (must be positive), mirroring max_context_window_tokens. Addresses PR review feedback. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
93cbf6b3f0
commit
8dde9ef627
@@ -66,23 +66,45 @@ def _assemble_instructions(
|
||||
def _assemble_compaction_provider(
|
||||
*,
|
||||
disable_compaction: bool,
|
||||
max_context_window_tokens: int,
|
||||
max_output_tokens: int,
|
||||
max_context_window_tokens: int | None,
|
||||
max_output_tokens: int | None,
|
||||
history_source_id: str,
|
||||
before_compaction_strategy: CompactionStrategy | None,
|
||||
after_compaction_strategy: CompactionStrategy | None,
|
||||
tokenizer: TokenizerProtocol | None,
|
||||
) -> CompactionProvider | None:
|
||||
"""Build the compaction provider from parameters or defaults."""
|
||||
"""Build the compaction provider from parameters or defaults.
|
||||
|
||||
The token-budget defaults (``ContextWindowCompactionStrategy`` for the before phase and
|
||||
``ToolResultCompactionStrategy`` for the after phase) are only applied when the token
|
||||
params are provided. Caller-supplied strategies are always honored. Either phase may end
|
||||
up ``None``, which ``CompactionProvider`` interprets as "skip that phase".
|
||||
|
||||
Returns None when compaction is explicitly disabled, or when neither phase has a strategy
|
||||
(no custom strategies and no token budget to build the defaults).
|
||||
"""
|
||||
if disable_compaction:
|
||||
return None
|
||||
|
||||
before_strategy = before_compaction_strategy or ContextWindowCompactionStrategy(
|
||||
max_context_window_tokens=max_context_window_tokens,
|
||||
max_output_tokens=max_output_tokens,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
after_strategy = after_compaction_strategy or ToolResultCompactionStrategy(keep_last_tool_call_groups=2)
|
||||
# Resolve the before-strategy: custom strategy wins; otherwise fall back to the
|
||||
# token-budget-aware default when token params are available.
|
||||
before_strategy = before_compaction_strategy
|
||||
if before_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None:
|
||||
before_strategy = ContextWindowCompactionStrategy(
|
||||
max_context_window_tokens=max_context_window_tokens,
|
||||
max_output_tokens=max_output_tokens,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
|
||||
# Resolve the after-strategy: custom strategy wins; otherwise fall back to the default
|
||||
# when token params are available.
|
||||
after_strategy = after_compaction_strategy
|
||||
if after_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None:
|
||||
after_strategy = ToolResultCompactionStrategy(keep_last_tool_call_groups=2)
|
||||
|
||||
# Nothing to compact in either phase: skip the provider entirely.
|
||||
if before_strategy is None and after_strategy is None:
|
||||
return None
|
||||
|
||||
return CompactionProvider(
|
||||
before_strategy=before_strategy,
|
||||
@@ -157,8 +179,8 @@ def create_harness_agent(
|
||||
harness_instructions: str | None = None,
|
||||
agent_instructions: str | None = None,
|
||||
tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None = None,
|
||||
max_context_window_tokens: int,
|
||||
max_output_tokens: int,
|
||||
max_context_window_tokens: int | None = None,
|
||||
max_output_tokens: int | None = None,
|
||||
history_provider: HistoryProvider | None = None,
|
||||
disable_compaction: bool = False,
|
||||
before_compaction_strategy: CompactionStrategy | None = None,
|
||||
@@ -206,8 +228,6 @@ def create_harness_agent(
|
||||
|
||||
agent = create_harness_agent(
|
||||
OpenAIChatClient(model="gpt-4o"),
|
||||
max_context_window_tokens=128_000,
|
||||
max_output_tokens=16_384,
|
||||
)
|
||||
session = agent.create_session()
|
||||
response = await agent.run("Plan a weekend trip to Seattle", session=session)
|
||||
@@ -243,13 +263,21 @@ def create_harness_agent(
|
||||
(e.g., "You are a research assistant focused on academic sources.").
|
||||
tools: Additional tools to include in the agent's toolset.
|
||||
max_context_window_tokens: Maximum tokens the model's context window supports.
|
||||
Used to construct the default token-budget-aware compaction strategies. When None
|
||||
(default) and no custom ``before_compaction_strategy`` / ``after_compaction_strategy``
|
||||
is provided, compaction is automatically disabled.
|
||||
max_output_tokens: Maximum output tokens per response.
|
||||
Used to construct the default compaction strategies and sets a default max_tokens
|
||||
chat option. When None (default), no default max_tokens option is set, and unless a
|
||||
custom compaction strategy is provided, compaction is automatically disabled.
|
||||
history_provider: Custom history provider. When None, an InMemoryHistoryProvider is used.
|
||||
disable_compaction: When True, skip compaction provider setup.
|
||||
before_compaction_strategy: Custom before-run compaction strategy.
|
||||
Defaults to ContextWindowCompactionStrategy (token-budget aware).
|
||||
after_compaction_strategy: Custom after-run compaction strategy.
|
||||
Defaults to ToolResultCompactionStrategy.
|
||||
before_compaction_strategy: Custom before-run compaction strategy. When provided,
|
||||
compaction runs even if token params are omitted. Defaults to
|
||||
ContextWindowCompactionStrategy (token-budget aware) when token params are provided.
|
||||
after_compaction_strategy: Custom after-run compaction strategy. When provided,
|
||||
compaction runs even if token params are omitted. Defaults to
|
||||
ToolResultCompactionStrategy when token params are provided.
|
||||
tokenizer: Custom tokenizer for compaction strategies.
|
||||
disable_todo: When True, skip the TodoProvider.
|
||||
todo_provider: Custom TodoProvider instance. Ignored when disable_todo is True.
|
||||
@@ -283,14 +311,19 @@ def create_harness_agent(
|
||||
A fully configured :class:`~agent_framework.Agent` instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If max_context_window_tokens <= 0 or max_output_tokens < 0
|
||||
or max_output_tokens >= max_context_window_tokens.
|
||||
ValueError: If max_context_window_tokens is provided and <= 0, or
|
||||
max_output_tokens is provided and <= 0, or max_output_tokens >=
|
||||
max_context_window_tokens when both are provided.
|
||||
"""
|
||||
if max_context_window_tokens <= 0:
|
||||
if max_context_window_tokens is not None and max_context_window_tokens <= 0:
|
||||
raise ValueError("max_context_window_tokens must be positive.")
|
||||
if max_output_tokens < 0:
|
||||
raise ValueError("max_output_tokens must be non-negative.")
|
||||
if max_output_tokens >= max_context_window_tokens:
|
||||
if max_output_tokens is not None and max_output_tokens <= 0:
|
||||
raise ValueError("max_output_tokens must be positive.")
|
||||
if (
|
||||
max_context_window_tokens is not None
|
||||
and max_output_tokens is not None
|
||||
and max_output_tokens >= max_context_window_tokens
|
||||
):
|
||||
raise ValueError("max_output_tokens must be less than max_context_window_tokens.")
|
||||
|
||||
# Build history provider.
|
||||
@@ -347,7 +380,8 @@ def create_harness_agent(
|
||||
|
||||
# Build default options dict.
|
||||
default_opts: dict[str, Any] = dict(default_options) if default_options else {}
|
||||
default_opts.setdefault("max_tokens", max_output_tokens)
|
||||
if max_output_tokens is not None:
|
||||
default_opts.setdefault("max_tokens", max_output_tokens)
|
||||
|
||||
agent = Agent(
|
||||
client,
|
||||
|
||||
@@ -194,6 +194,63 @@ def test_create_harness_agent_returns_full_agent() -> None:
|
||||
assert isinstance(agent, FullAgent)
|
||||
|
||||
|
||||
def test_create_harness_agent_no_token_params_disables_compaction() -> None:
|
||||
"""When token params are omitted, compaction is automatically disabled."""
|
||||
agent = create_harness_agent(
|
||||
client=_FakeChatClient(), # type: ignore[arg-type]
|
||||
)
|
||||
provider_types = [type(p) for p in agent.context_providers]
|
||||
assert CompactionProvider not in provider_types
|
||||
|
||||
|
||||
def test_create_harness_agent_no_token_params_skips_max_tokens_option() -> None:
|
||||
"""When max_output_tokens is omitted, max_tokens should not be set in default options."""
|
||||
agent = create_harness_agent(
|
||||
client=_FakeChatClient(), # type: ignore[arg-type]
|
||||
)
|
||||
assert agent.default_options.get("max_tokens") is None
|
||||
|
||||
|
||||
def test_create_harness_agent_custom_before_strategy_enables_compaction_without_tokens() -> None:
|
||||
"""A custom before_compaction_strategy enables compaction even when token params are omitted."""
|
||||
from agent_framework import ToolResultCompactionStrategy
|
||||
|
||||
agent = create_harness_agent(
|
||||
client=_FakeChatClient(), # type: ignore[arg-type]
|
||||
before_compaction_strategy=ToolResultCompactionStrategy(),
|
||||
)
|
||||
provider_types = [type(p) for p in agent.context_providers]
|
||||
assert CompactionProvider in provider_types
|
||||
|
||||
|
||||
def test_create_harness_agent_disable_compaction_overrides_custom_before_strategy() -> None:
|
||||
"""disable_compaction=True wins even when a custom before strategy is provided."""
|
||||
from agent_framework import ToolResultCompactionStrategy
|
||||
|
||||
agent = create_harness_agent(
|
||||
client=_FakeChatClient(), # type: ignore[arg-type]
|
||||
before_compaction_strategy=ToolResultCompactionStrategy(),
|
||||
disable_compaction=True,
|
||||
)
|
||||
provider_types = [type(p) for p in agent.context_providers]
|
||||
assert CompactionProvider not in provider_types
|
||||
|
||||
|
||||
def test_create_harness_agent_custom_after_strategy_enables_compaction_without_tokens() -> None:
|
||||
"""A custom after_compaction_strategy enables compaction even when token params are omitted."""
|
||||
from agent_framework import ToolResultCompactionStrategy
|
||||
|
||||
agent = create_harness_agent(
|
||||
client=_FakeChatClient(), # type: ignore[arg-type]
|
||||
after_compaction_strategy=ToolResultCompactionStrategy(),
|
||||
)
|
||||
compaction_providers = [p for p in agent.context_providers if isinstance(p, CompactionProvider)]
|
||||
assert len(compaction_providers) == 1
|
||||
# Before phase is skipped (no token budget, no custom before strategy), after phase is set.
|
||||
assert compaction_providers[0].before_strategy is None
|
||||
assert compaction_providers[0].after_strategy is not None
|
||||
|
||||
|
||||
# --- Validation Tests ---
|
||||
|
||||
|
||||
@@ -207,14 +264,15 @@ def test_create_harness_agent_rejects_invalid_context_tokens() -> None:
|
||||
)
|
||||
|
||||
|
||||
def test_create_harness_agent_rejects_negative_output_tokens() -> None:
|
||||
"""max_output_tokens must be non-negative."""
|
||||
with pytest.raises(ValueError, match="max_output_tokens must be non-negative"):
|
||||
create_harness_agent(
|
||||
client=_FakeChatClient(), # type: ignore[arg-type]
|
||||
max_context_window_tokens=1000,
|
||||
max_output_tokens=-1,
|
||||
)
|
||||
def test_create_harness_agent_rejects_non_positive_output_tokens() -> None:
|
||||
"""max_output_tokens must be positive when provided."""
|
||||
for invalid_value in (0, -1):
|
||||
with pytest.raises(ValueError, match="max_output_tokens must be positive"):
|
||||
create_harness_agent(
|
||||
client=_FakeChatClient(), # type: ignore[arg-type]
|
||||
max_context_window_tokens=1000,
|
||||
max_output_tokens=invalid_value,
|
||||
)
|
||||
|
||||
|
||||
def test_create_harness_agent_rejects_output_gte_context() -> None:
|
||||
|
||||
@@ -45,13 +45,23 @@ python samples/02-agents/harness/harness_research.py
|
||||
|
||||
### Minimal Setup
|
||||
|
||||
`create_harness_agent` requires only a chat client and token budget parameters:
|
||||
`create_harness_agent` requires only a chat client:
|
||||
|
||||
```python
|
||||
from agent_framework import create_harness_agent
|
||||
from agent_framework.foundry import FoundryChatClient
|
||||
from azure.identity import AzureCliCredential
|
||||
|
||||
agent = create_harness_agent(
|
||||
client=FoundryChatClient(credential=AzureCliCredential()),
|
||||
)
|
||||
```
|
||||
|
||||
### With Compaction
|
||||
|
||||
Provide token budget parameters to enable automatic context-window compaction:
|
||||
|
||||
```python
|
||||
agent = create_harness_agent(
|
||||
client=FoundryChatClient(credential=AzureCliCredential()),
|
||||
max_context_window_tokens=128_000,
|
||||
@@ -59,7 +69,7 @@ agent = create_harness_agent(
|
||||
)
|
||||
```
|
||||
|
||||
### Customization
|
||||
### Further Customization
|
||||
|
||||
Disable or customize any feature:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user