Python: HarnessAgent: Disable compaction when max tokens not provided (#6410)

* HarnessAgent: Disable compaction when max tokens not provided * Fix regression. * Address PR comments * Require max_output_tokens to be positive Reject max_output_tokens=0 (must be positive), mirroring max_context_window_tokens. Addresses PR review feedback. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-16 21:04:09 +08:00 · 2026-06-10 14:57:23 +01:00
parent 93cbf6b3f0
commit 8dde9ef627
3 changed files with 136 additions and 34 deletions
@@ -66,23 +66,45 @@ def _assemble_instructions(
 def _assemble_compaction_provider(
    *,
    disable_compaction: bool,
-    max_context_window_tokens: int,
-    max_output_tokens: int,
+    max_context_window_tokens: int | None,
+    max_output_tokens: int | None,
    history_source_id: str,
    before_compaction_strategy: CompactionStrategy | None,
    after_compaction_strategy: CompactionStrategy | None,
    tokenizer: TokenizerProtocol | None,
 ) -> CompactionProvider | None:
-    """Build the compaction provider from parameters or defaults."""
+    """Build the compaction provider from parameters or defaults.
+
+    The token-budget defaults (``ContextWindowCompactionStrategy`` for the before phase and
+    ``ToolResultCompactionStrategy`` for the after phase) are only applied when the token
+    params are provided. Caller-supplied strategies are always honored. Either phase may end
+    up ``None``, which ``CompactionProvider`` interprets as "skip that phase".
+
+    Returns None when compaction is explicitly disabled, or when neither phase has a strategy
+    (no custom strategies and no token budget to build the defaults).
+    """
    if disable_compaction:
        return None

-    before_strategy = before_compaction_strategy or ContextWindowCompactionStrategy(
-        max_context_window_tokens=max_context_window_tokens,
-        max_output_tokens=max_output_tokens,
-        tokenizer=tokenizer,
-    )
-    after_strategy = after_compaction_strategy or ToolResultCompactionStrategy(keep_last_tool_call_groups=2)
+    # Resolve the before-strategy: custom strategy wins; otherwise fall back to the
+    # token-budget-aware default when token params are available.
+    before_strategy = before_compaction_strategy
+    if before_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None:
+        before_strategy = ContextWindowCompactionStrategy(
+            max_context_window_tokens=max_context_window_tokens,
+            max_output_tokens=max_output_tokens,
+            tokenizer=tokenizer,
+        )
+
+    # Resolve the after-strategy: custom strategy wins; otherwise fall back to the default
+    # when token params are available.
+    after_strategy = after_compaction_strategy
+    if after_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None:
+        after_strategy = ToolResultCompactionStrategy(keep_last_tool_call_groups=2)
+
+    # Nothing to compact in either phase: skip the provider entirely.
+    if before_strategy is None and after_strategy is None:
+        return None

    return CompactionProvider(
        before_strategy=before_strategy,
@@ -157,8 +179,8 @@ def create_harness_agent(
    harness_instructions: str | None = None,
    agent_instructions: str | None = None,
    tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None = None,
-    max_context_window_tokens: int,
-    max_output_tokens: int,
+    max_context_window_tokens: int | None = None,
+    max_output_tokens: int | None = None,
    history_provider: HistoryProvider | None = None,
    disable_compaction: bool = False,
    before_compaction_strategy: CompactionStrategy | None = None,
@@ -206,8 +228,6 @@ def create_harness_agent(

            agent = create_harness_agent(
                OpenAIChatClient(model="gpt-4o"),
-                max_context_window_tokens=128_000,
-                max_output_tokens=16_384,
            )
            session = agent.create_session()
            response = await agent.run("Plan a weekend trip to Seattle", session=session)
@@ -243,13 +263,21 @@ def create_harness_agent(
            (e.g., "You are a research assistant focused on academic sources.").
        tools: Additional tools to include in the agent's toolset.
        max_context_window_tokens: Maximum tokens the model's context window supports.
+            Used to construct the default token-budget-aware compaction strategies. When None
+            (default) and no custom ``before_compaction_strategy`` / ``after_compaction_strategy``
+            is provided, compaction is automatically disabled.
        max_output_tokens: Maximum output tokens per response.
+            Used to construct the default compaction strategies and sets a default max_tokens
+            chat option. When None (default), no default max_tokens option is set, and unless a
+            custom compaction strategy is provided, compaction is automatically disabled.
        history_provider: Custom history provider. When None, an InMemoryHistoryProvider is used.
        disable_compaction: When True, skip compaction provider setup.
-        before_compaction_strategy: Custom before-run compaction strategy.
-            Defaults to ContextWindowCompactionStrategy (token-budget aware).
-        after_compaction_strategy: Custom after-run compaction strategy.
-            Defaults to ToolResultCompactionStrategy.
+        before_compaction_strategy: Custom before-run compaction strategy. When provided,
+            compaction runs even if token params are omitted. Defaults to
+            ContextWindowCompactionStrategy (token-budget aware) when token params are provided.
+        after_compaction_strategy: Custom after-run compaction strategy. When provided,
+            compaction runs even if token params are omitted. Defaults to
+            ToolResultCompactionStrategy when token params are provided.
        tokenizer: Custom tokenizer for compaction strategies.
        disable_todo: When True, skip the TodoProvider.
        todo_provider: Custom TodoProvider instance. Ignored when disable_todo is True.
@@ -283,14 +311,19 @@ def create_harness_agent(
        A fully configured :class:`~agent_framework.Agent` instance.

    Raises:
-        ValueError: If max_context_window_tokens <= 0 or max_output_tokens < 0
-            or max_output_tokens >= max_context_window_tokens.
+        ValueError: If max_context_window_tokens is provided and <= 0, or
+            max_output_tokens is provided and <= 0, or max_output_tokens >=
+            max_context_window_tokens when both are provided.
    """
-    if max_context_window_tokens <= 0:
+    if max_context_window_tokens is not None and max_context_window_tokens <= 0:
        raise ValueError("max_context_window_tokens must be positive.")
-    if max_output_tokens < 0:
-        raise ValueError("max_output_tokens must be non-negative.")
-    if max_output_tokens >= max_context_window_tokens:
+    if max_output_tokens is not None and max_output_tokens <= 0:
+        raise ValueError("max_output_tokens must be positive.")
+    if (
+        max_context_window_tokens is not None
+        and max_output_tokens is not None
+        and max_output_tokens >= max_context_window_tokens
+    ):
        raise ValueError("max_output_tokens must be less than max_context_window_tokens.")

    # Build history provider.
@@ -347,7 +380,8 @@ def create_harness_agent(

    # Build default options dict.
    default_opts: dict[str, Any] = dict(default_options) if default_options else {}
-    default_opts.setdefault("max_tokens", max_output_tokens)
+    if max_output_tokens is not None:
+        default_opts.setdefault("max_tokens", max_output_tokens)

    agent = Agent(
        client,
@@ -194,6 +194,63 @@ def test_create_harness_agent_returns_full_agent() -> None:
    assert isinstance(agent, FullAgent)


+def test_create_harness_agent_no_token_params_disables_compaction() -> None:
+    """When token params are omitted, compaction is automatically disabled."""
+    agent = create_harness_agent(
+        client=_FakeChatClient(),  # type: ignore[arg-type]
+    )
+    provider_types = [type(p) for p in agent.context_providers]
+    assert CompactionProvider not in provider_types
+
+
+def test_create_harness_agent_no_token_params_skips_max_tokens_option() -> None:
+    """When max_output_tokens is omitted, max_tokens should not be set in default options."""
+    agent = create_harness_agent(
+        client=_FakeChatClient(),  # type: ignore[arg-type]
+    )
+    assert agent.default_options.get("max_tokens") is None
+
+
+def test_create_harness_agent_custom_before_strategy_enables_compaction_without_tokens() -> None:
+    """A custom before_compaction_strategy enables compaction even when token params are omitted."""
+    from agent_framework import ToolResultCompactionStrategy
+
+    agent = create_harness_agent(
+        client=_FakeChatClient(),  # type: ignore[arg-type]
+        before_compaction_strategy=ToolResultCompactionStrategy(),
+    )
+    provider_types = [type(p) for p in agent.context_providers]
+    assert CompactionProvider in provider_types
+
+
+def test_create_harness_agent_disable_compaction_overrides_custom_before_strategy() -> None:
+    """disable_compaction=True wins even when a custom before strategy is provided."""
+    from agent_framework import ToolResultCompactionStrategy
+
+    agent = create_harness_agent(
+        client=_FakeChatClient(),  # type: ignore[arg-type]
+        before_compaction_strategy=ToolResultCompactionStrategy(),
+        disable_compaction=True,
+    )
+    provider_types = [type(p) for p in agent.context_providers]
+    assert CompactionProvider not in provider_types
+
+
+def test_create_harness_agent_custom_after_strategy_enables_compaction_without_tokens() -> None:
+    """A custom after_compaction_strategy enables compaction even when token params are omitted."""
+    from agent_framework import ToolResultCompactionStrategy
+
+    agent = create_harness_agent(
+        client=_FakeChatClient(),  # type: ignore[arg-type]
+        after_compaction_strategy=ToolResultCompactionStrategy(),
+    )
+    compaction_providers = [p for p in agent.context_providers if isinstance(p, CompactionProvider)]
+    assert len(compaction_providers) == 1
+    # Before phase is skipped (no token budget, no custom before strategy), after phase is set.
+    assert compaction_providers[0].before_strategy is None
+    assert compaction_providers[0].after_strategy is not None
+
+
 # --- Validation Tests ---


@@ -207,14 +264,15 @@ def test_create_harness_agent_rejects_invalid_context_tokens() -> None:
        )


-def test_create_harness_agent_rejects_negative_output_tokens() -> None:
-    """max_output_tokens must be non-negative."""
-    with pytest.raises(ValueError, match="max_output_tokens must be non-negative"):
-        create_harness_agent(
-            client=_FakeChatClient(),  # type: ignore[arg-type]
-            max_context_window_tokens=1000,
-            max_output_tokens=-1,
-        )
+def test_create_harness_agent_rejects_non_positive_output_tokens() -> None:
+    """max_output_tokens must be positive when provided."""
+    for invalid_value in (0, -1):
+        with pytest.raises(ValueError, match="max_output_tokens must be positive"):
+            create_harness_agent(
+                client=_FakeChatClient(),  # type: ignore[arg-type]
+                max_context_window_tokens=1000,
+                max_output_tokens=invalid_value,
+            )


 def test_create_harness_agent_rejects_output_gte_context() -> None:
@@ -45,13 +45,23 @@ python samples/02-agents/harness/harness_research.py

 ### Minimal Setup

-`create_harness_agent` requires only a chat client and token budget parameters:
+`create_harness_agent` requires only a chat client:

 ```python
 from agent_framework import create_harness_agent
 from agent_framework.foundry import FoundryChatClient
 from azure.identity import AzureCliCredential

+agent = create_harness_agent(
+    client=FoundryChatClient(credential=AzureCliCredential()),
+)
+```
+
+### With Compaction
+
+Provide token budget parameters to enable automatic context-window compaction:
+
+```python
 agent = create_harness_agent(
    client=FoundryChatClient(credential=AzureCliCredential()),
    max_context_window_tokens=128_000,
@@ -59,7 +69,7 @@ agent = create_harness_agent(
 )
 ```

-### Customization
+### Further Customization

 Disable or customize any feature: