Python: HarnessAgent: Disable compaction when max tokens not provided (#6410)

* HarnessAgent: Disable compaction when max tokens not provided

* Fix regression.

* Address PR comments

* Require max_output_tokens to be positive

Reject max_output_tokens=0 (must be positive), mirroring
max_context_window_tokens. Addresses PR review feedback.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
westey
2026-06-10 14:57:23 +01:00
committed by GitHub
Unverified
parent 93cbf6b3f0
commit 8dde9ef627
3 changed files with 136 additions and 34 deletions
@@ -66,23 +66,45 @@ def _assemble_instructions(
def _assemble_compaction_provider(
*,
disable_compaction: bool,
max_context_window_tokens: int,
max_output_tokens: int,
max_context_window_tokens: int | None,
max_output_tokens: int | None,
history_source_id: str,
before_compaction_strategy: CompactionStrategy | None,
after_compaction_strategy: CompactionStrategy | None,
tokenizer: TokenizerProtocol | None,
) -> CompactionProvider | None:
"""Build the compaction provider from parameters or defaults."""
"""Build the compaction provider from parameters or defaults.
The token-budget defaults (``ContextWindowCompactionStrategy`` for the before phase and
``ToolResultCompactionStrategy`` for the after phase) are only applied when the token
params are provided. Caller-supplied strategies are always honored. Either phase may end
up ``None``, which ``CompactionProvider`` interprets as "skip that phase".
Returns None when compaction is explicitly disabled, or when neither phase has a strategy
(no custom strategies and no token budget to build the defaults).
"""
if disable_compaction:
return None
before_strategy = before_compaction_strategy or ContextWindowCompactionStrategy(
max_context_window_tokens=max_context_window_tokens,
max_output_tokens=max_output_tokens,
tokenizer=tokenizer,
)
after_strategy = after_compaction_strategy or ToolResultCompactionStrategy(keep_last_tool_call_groups=2)
# Resolve the before-strategy: custom strategy wins; otherwise fall back to the
# token-budget-aware default when token params are available.
before_strategy = before_compaction_strategy
if before_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None:
before_strategy = ContextWindowCompactionStrategy(
max_context_window_tokens=max_context_window_tokens,
max_output_tokens=max_output_tokens,
tokenizer=tokenizer,
)
# Resolve the after-strategy: custom strategy wins; otherwise fall back to the default
# when token params are available.
after_strategy = after_compaction_strategy
if after_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None:
after_strategy = ToolResultCompactionStrategy(keep_last_tool_call_groups=2)
# Nothing to compact in either phase: skip the provider entirely.
if before_strategy is None and after_strategy is None:
return None
return CompactionProvider(
before_strategy=before_strategy,
@@ -157,8 +179,8 @@ def create_harness_agent(
harness_instructions: str | None = None,
agent_instructions: str | None = None,
tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None = None,
max_context_window_tokens: int,
max_output_tokens: int,
max_context_window_tokens: int | None = None,
max_output_tokens: int | None = None,
history_provider: HistoryProvider | None = None,
disable_compaction: bool = False,
before_compaction_strategy: CompactionStrategy | None = None,
@@ -206,8 +228,6 @@ def create_harness_agent(
agent = create_harness_agent(
OpenAIChatClient(model="gpt-4o"),
max_context_window_tokens=128_000,
max_output_tokens=16_384,
)
session = agent.create_session()
response = await agent.run("Plan a weekend trip to Seattle", session=session)
@@ -243,13 +263,21 @@ def create_harness_agent(
(e.g., "You are a research assistant focused on academic sources.").
tools: Additional tools to include in the agent's toolset.
max_context_window_tokens: Maximum tokens the model's context window supports.
Used to construct the default token-budget-aware compaction strategies. When None
(default) and no custom ``before_compaction_strategy`` / ``after_compaction_strategy``
is provided, compaction is automatically disabled.
max_output_tokens: Maximum output tokens per response.
Used to construct the default compaction strategies and sets a default max_tokens
chat option. When None (default), no default max_tokens option is set, and unless a
custom compaction strategy is provided, compaction is automatically disabled.
history_provider: Custom history provider. When None, an InMemoryHistoryProvider is used.
disable_compaction: When True, skip compaction provider setup.
before_compaction_strategy: Custom before-run compaction strategy.
Defaults to ContextWindowCompactionStrategy (token-budget aware).
after_compaction_strategy: Custom after-run compaction strategy.
Defaults to ToolResultCompactionStrategy.
before_compaction_strategy: Custom before-run compaction strategy. When provided,
compaction runs even if token params are omitted. Defaults to
ContextWindowCompactionStrategy (token-budget aware) when token params are provided.
after_compaction_strategy: Custom after-run compaction strategy. When provided,
compaction runs even if token params are omitted. Defaults to
ToolResultCompactionStrategy when token params are provided.
tokenizer: Custom tokenizer for compaction strategies.
disable_todo: When True, skip the TodoProvider.
todo_provider: Custom TodoProvider instance. Ignored when disable_todo is True.
@@ -283,14 +311,19 @@ def create_harness_agent(
A fully configured :class:`~agent_framework.Agent` instance.
Raises:
ValueError: If max_context_window_tokens <= 0 or max_output_tokens < 0
or max_output_tokens >= max_context_window_tokens.
ValueError: If max_context_window_tokens is provided and <= 0, or
max_output_tokens is provided and <= 0, or max_output_tokens >=
max_context_window_tokens when both are provided.
"""
if max_context_window_tokens <= 0:
if max_context_window_tokens is not None and max_context_window_tokens <= 0:
raise ValueError("max_context_window_tokens must be positive.")
if max_output_tokens < 0:
raise ValueError("max_output_tokens must be non-negative.")
if max_output_tokens >= max_context_window_tokens:
if max_output_tokens is not None and max_output_tokens <= 0:
raise ValueError("max_output_tokens must be positive.")
if (
max_context_window_tokens is not None
and max_output_tokens is not None
and max_output_tokens >= max_context_window_tokens
):
raise ValueError("max_output_tokens must be less than max_context_window_tokens.")
# Build history provider.
@@ -347,7 +380,8 @@ def create_harness_agent(
# Build default options dict.
default_opts: dict[str, Any] = dict(default_options) if default_options else {}
default_opts.setdefault("max_tokens", max_output_tokens)
if max_output_tokens is not None:
default_opts.setdefault("max_tokens", max_output_tokens)
agent = Agent(
client,
@@ -194,6 +194,63 @@ def test_create_harness_agent_returns_full_agent() -> None:
assert isinstance(agent, FullAgent)
def test_create_harness_agent_no_token_params_disables_compaction() -> None:
"""When token params are omitted, compaction is automatically disabled."""
agent = create_harness_agent(
client=_FakeChatClient(), # type: ignore[arg-type]
)
provider_types = [type(p) for p in agent.context_providers]
assert CompactionProvider not in provider_types
def test_create_harness_agent_no_token_params_skips_max_tokens_option() -> None:
"""When max_output_tokens is omitted, max_tokens should not be set in default options."""
agent = create_harness_agent(
client=_FakeChatClient(), # type: ignore[arg-type]
)
assert agent.default_options.get("max_tokens") is None
def test_create_harness_agent_custom_before_strategy_enables_compaction_without_tokens() -> None:
"""A custom before_compaction_strategy enables compaction even when token params are omitted."""
from agent_framework import ToolResultCompactionStrategy
agent = create_harness_agent(
client=_FakeChatClient(), # type: ignore[arg-type]
before_compaction_strategy=ToolResultCompactionStrategy(),
)
provider_types = [type(p) for p in agent.context_providers]
assert CompactionProvider in provider_types
def test_create_harness_agent_disable_compaction_overrides_custom_before_strategy() -> None:
"""disable_compaction=True wins even when a custom before strategy is provided."""
from agent_framework import ToolResultCompactionStrategy
agent = create_harness_agent(
client=_FakeChatClient(), # type: ignore[arg-type]
before_compaction_strategy=ToolResultCompactionStrategy(),
disable_compaction=True,
)
provider_types = [type(p) for p in agent.context_providers]
assert CompactionProvider not in provider_types
def test_create_harness_agent_custom_after_strategy_enables_compaction_without_tokens() -> None:
"""A custom after_compaction_strategy enables compaction even when token params are omitted."""
from agent_framework import ToolResultCompactionStrategy
agent = create_harness_agent(
client=_FakeChatClient(), # type: ignore[arg-type]
after_compaction_strategy=ToolResultCompactionStrategy(),
)
compaction_providers = [p for p in agent.context_providers if isinstance(p, CompactionProvider)]
assert len(compaction_providers) == 1
# Before phase is skipped (no token budget, no custom before strategy), after phase is set.
assert compaction_providers[0].before_strategy is None
assert compaction_providers[0].after_strategy is not None
# --- Validation Tests ---
@@ -207,14 +264,15 @@ def test_create_harness_agent_rejects_invalid_context_tokens() -> None:
)
def test_create_harness_agent_rejects_negative_output_tokens() -> None:
"""max_output_tokens must be non-negative."""
with pytest.raises(ValueError, match="max_output_tokens must be non-negative"):
create_harness_agent(
client=_FakeChatClient(), # type: ignore[arg-type]
max_context_window_tokens=1000,
max_output_tokens=-1,
)
def test_create_harness_agent_rejects_non_positive_output_tokens() -> None:
"""max_output_tokens must be positive when provided."""
for invalid_value in (0, -1):
with pytest.raises(ValueError, match="max_output_tokens must be positive"):
create_harness_agent(
client=_FakeChatClient(), # type: ignore[arg-type]
max_context_window_tokens=1000,
max_output_tokens=invalid_value,
)
def test_create_harness_agent_rejects_output_gte_context() -> None:
+12 -2
View File
@@ -45,13 +45,23 @@ python samples/02-agents/harness/harness_research.py
### Minimal Setup
`create_harness_agent` requires only a chat client and token budget parameters:
`create_harness_agent` requires only a chat client:
```python
from agent_framework import create_harness_agent
from agent_framework.foundry import FoundryChatClient
from azure.identity import AzureCliCredential
agent = create_harness_agent(
client=FoundryChatClient(credential=AzureCliCredential()),
)
```
### With Compaction
Provide token budget parameters to enable automatic context-window compaction:
```python
agent = create_harness_agent(
client=FoundryChatClient(credential=AzureCliCredential()),
max_context_window_tokens=128_000,
@@ -59,7 +69,7 @@ agent = create_harness_agent(
)
```
### Customization
### Further Customization
Disable or customize any feature: