diff --git a/python/packages/core/agent_framework/_harness/_agent.py b/python/packages/core/agent_framework/_harness/_agent.py index 5896f72141..0ae0c73032 100644 --- a/python/packages/core/agent_framework/_harness/_agent.py +++ b/python/packages/core/agent_framework/_harness/_agent.py @@ -66,23 +66,45 @@ def _assemble_instructions( def _assemble_compaction_provider( *, disable_compaction: bool, - max_context_window_tokens: int, - max_output_tokens: int, + max_context_window_tokens: int | None, + max_output_tokens: int | None, history_source_id: str, before_compaction_strategy: CompactionStrategy | None, after_compaction_strategy: CompactionStrategy | None, tokenizer: TokenizerProtocol | None, ) -> CompactionProvider | None: - """Build the compaction provider from parameters or defaults.""" + """Build the compaction provider from parameters or defaults. + + The token-budget defaults (``ContextWindowCompactionStrategy`` for the before phase and + ``ToolResultCompactionStrategy`` for the after phase) are only applied when the token + params are provided. Caller-supplied strategies are always honored. Either phase may end + up ``None``, which ``CompactionProvider`` interprets as "skip that phase". + + Returns None when compaction is explicitly disabled, or when neither phase has a strategy + (no custom strategies and no token budget to build the defaults). + """ if disable_compaction: return None - before_strategy = before_compaction_strategy or ContextWindowCompactionStrategy( - max_context_window_tokens=max_context_window_tokens, - max_output_tokens=max_output_tokens, - tokenizer=tokenizer, - ) - after_strategy = after_compaction_strategy or ToolResultCompactionStrategy(keep_last_tool_call_groups=2) + # Resolve the before-strategy: custom strategy wins; otherwise fall back to the + # token-budget-aware default when token params are available. + before_strategy = before_compaction_strategy + if before_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None: + before_strategy = ContextWindowCompactionStrategy( + max_context_window_tokens=max_context_window_tokens, + max_output_tokens=max_output_tokens, + tokenizer=tokenizer, + ) + + # Resolve the after-strategy: custom strategy wins; otherwise fall back to the default + # when token params are available. + after_strategy = after_compaction_strategy + if after_strategy is None and max_context_window_tokens is not None and max_output_tokens is not None: + after_strategy = ToolResultCompactionStrategy(keep_last_tool_call_groups=2) + + # Nothing to compact in either phase: skip the provider entirely. + if before_strategy is None and after_strategy is None: + return None return CompactionProvider( before_strategy=before_strategy, @@ -157,8 +179,8 @@ def create_harness_agent( harness_instructions: str | None = None, agent_instructions: str | None = None, tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None = None, - max_context_window_tokens: int, - max_output_tokens: int, + max_context_window_tokens: int | None = None, + max_output_tokens: int | None = None, history_provider: HistoryProvider | None = None, disable_compaction: bool = False, before_compaction_strategy: CompactionStrategy | None = None, @@ -206,8 +228,6 @@ def create_harness_agent( agent = create_harness_agent( OpenAIChatClient(model="gpt-4o"), - max_context_window_tokens=128_000, - max_output_tokens=16_384, ) session = agent.create_session() response = await agent.run("Plan a weekend trip to Seattle", session=session) @@ -243,13 +263,21 @@ def create_harness_agent( (e.g., "You are a research assistant focused on academic sources."). tools: Additional tools to include in the agent's toolset. max_context_window_tokens: Maximum tokens the model's context window supports. + Used to construct the default token-budget-aware compaction strategies. When None + (default) and no custom ``before_compaction_strategy`` / ``after_compaction_strategy`` + is provided, compaction is automatically disabled. max_output_tokens: Maximum output tokens per response. + Used to construct the default compaction strategies and sets a default max_tokens + chat option. When None (default), no default max_tokens option is set, and unless a + custom compaction strategy is provided, compaction is automatically disabled. history_provider: Custom history provider. When None, an InMemoryHistoryProvider is used. disable_compaction: When True, skip compaction provider setup. - before_compaction_strategy: Custom before-run compaction strategy. - Defaults to ContextWindowCompactionStrategy (token-budget aware). - after_compaction_strategy: Custom after-run compaction strategy. - Defaults to ToolResultCompactionStrategy. + before_compaction_strategy: Custom before-run compaction strategy. When provided, + compaction runs even if token params are omitted. Defaults to + ContextWindowCompactionStrategy (token-budget aware) when token params are provided. + after_compaction_strategy: Custom after-run compaction strategy. When provided, + compaction runs even if token params are omitted. Defaults to + ToolResultCompactionStrategy when token params are provided. tokenizer: Custom tokenizer for compaction strategies. disable_todo: When True, skip the TodoProvider. todo_provider: Custom TodoProvider instance. Ignored when disable_todo is True. @@ -283,14 +311,19 @@ def create_harness_agent( A fully configured :class:`~agent_framework.Agent` instance. Raises: - ValueError: If max_context_window_tokens <= 0 or max_output_tokens < 0 - or max_output_tokens >= max_context_window_tokens. + ValueError: If max_context_window_tokens is provided and <= 0, or + max_output_tokens is provided and <= 0, or max_output_tokens >= + max_context_window_tokens when both are provided. """ - if max_context_window_tokens <= 0: + if max_context_window_tokens is not None and max_context_window_tokens <= 0: raise ValueError("max_context_window_tokens must be positive.") - if max_output_tokens < 0: - raise ValueError("max_output_tokens must be non-negative.") - if max_output_tokens >= max_context_window_tokens: + if max_output_tokens is not None and max_output_tokens <= 0: + raise ValueError("max_output_tokens must be positive.") + if ( + max_context_window_tokens is not None + and max_output_tokens is not None + and max_output_tokens >= max_context_window_tokens + ): raise ValueError("max_output_tokens must be less than max_context_window_tokens.") # Build history provider. @@ -347,7 +380,8 @@ def create_harness_agent( # Build default options dict. default_opts: dict[str, Any] = dict(default_options) if default_options else {} - default_opts.setdefault("max_tokens", max_output_tokens) + if max_output_tokens is not None: + default_opts.setdefault("max_tokens", max_output_tokens) agent = Agent( client, diff --git a/python/packages/core/tests/core/test_harness_agent.py b/python/packages/core/tests/core/test_harness_agent.py index 58ef3f5f2d..7da1bdbf36 100644 --- a/python/packages/core/tests/core/test_harness_agent.py +++ b/python/packages/core/tests/core/test_harness_agent.py @@ -194,6 +194,63 @@ def test_create_harness_agent_returns_full_agent() -> None: assert isinstance(agent, FullAgent) +def test_create_harness_agent_no_token_params_disables_compaction() -> None: + """When token params are omitted, compaction is automatically disabled.""" + agent = create_harness_agent( + client=_FakeChatClient(), # type: ignore[arg-type] + ) + provider_types = [type(p) for p in agent.context_providers] + assert CompactionProvider not in provider_types + + +def test_create_harness_agent_no_token_params_skips_max_tokens_option() -> None: + """When max_output_tokens is omitted, max_tokens should not be set in default options.""" + agent = create_harness_agent( + client=_FakeChatClient(), # type: ignore[arg-type] + ) + assert agent.default_options.get("max_tokens") is None + + +def test_create_harness_agent_custom_before_strategy_enables_compaction_without_tokens() -> None: + """A custom before_compaction_strategy enables compaction even when token params are omitted.""" + from agent_framework import ToolResultCompactionStrategy + + agent = create_harness_agent( + client=_FakeChatClient(), # type: ignore[arg-type] + before_compaction_strategy=ToolResultCompactionStrategy(), + ) + provider_types = [type(p) for p in agent.context_providers] + assert CompactionProvider in provider_types + + +def test_create_harness_agent_disable_compaction_overrides_custom_before_strategy() -> None: + """disable_compaction=True wins even when a custom before strategy is provided.""" + from agent_framework import ToolResultCompactionStrategy + + agent = create_harness_agent( + client=_FakeChatClient(), # type: ignore[arg-type] + before_compaction_strategy=ToolResultCompactionStrategy(), + disable_compaction=True, + ) + provider_types = [type(p) for p in agent.context_providers] + assert CompactionProvider not in provider_types + + +def test_create_harness_agent_custom_after_strategy_enables_compaction_without_tokens() -> None: + """A custom after_compaction_strategy enables compaction even when token params are omitted.""" + from agent_framework import ToolResultCompactionStrategy + + agent = create_harness_agent( + client=_FakeChatClient(), # type: ignore[arg-type] + after_compaction_strategy=ToolResultCompactionStrategy(), + ) + compaction_providers = [p for p in agent.context_providers if isinstance(p, CompactionProvider)] + assert len(compaction_providers) == 1 + # Before phase is skipped (no token budget, no custom before strategy), after phase is set. + assert compaction_providers[0].before_strategy is None + assert compaction_providers[0].after_strategy is not None + + # --- Validation Tests --- @@ -207,14 +264,15 @@ def test_create_harness_agent_rejects_invalid_context_tokens() -> None: ) -def test_create_harness_agent_rejects_negative_output_tokens() -> None: - """max_output_tokens must be non-negative.""" - with pytest.raises(ValueError, match="max_output_tokens must be non-negative"): - create_harness_agent( - client=_FakeChatClient(), # type: ignore[arg-type] - max_context_window_tokens=1000, - max_output_tokens=-1, - ) +def test_create_harness_agent_rejects_non_positive_output_tokens() -> None: + """max_output_tokens must be positive when provided.""" + for invalid_value in (0, -1): + with pytest.raises(ValueError, match="max_output_tokens must be positive"): + create_harness_agent( + client=_FakeChatClient(), # type: ignore[arg-type] + max_context_window_tokens=1000, + max_output_tokens=invalid_value, + ) def test_create_harness_agent_rejects_output_gte_context() -> None: diff --git a/python/samples/02-agents/harness/README.md b/python/samples/02-agents/harness/README.md index 3bf0f09110..15424e1422 100644 --- a/python/samples/02-agents/harness/README.md +++ b/python/samples/02-agents/harness/README.md @@ -45,13 +45,23 @@ python samples/02-agents/harness/harness_research.py ### Minimal Setup -`create_harness_agent` requires only a chat client and token budget parameters: +`create_harness_agent` requires only a chat client: ```python from agent_framework import create_harness_agent from agent_framework.foundry import FoundryChatClient from azure.identity import AzureCliCredential +agent = create_harness_agent( + client=FoundryChatClient(credential=AzureCliCredential()), +) +``` + +### With Compaction + +Provide token budget parameters to enable automatic context-window compaction: + +```python agent = create_harness_agent( client=FoundryChatClient(credential=AzureCliCredential()), max_context_window_tokens=128_000, @@ -59,7 +69,7 @@ agent = create_harness_agent( ) ``` -### Customization +### Further Customization Disable or customize any feature: