mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
[codex] Add token budget context feature (#27438)
## Why The model should be able to see bounded context-window budget metadata when the `token_budget` feature is enabled. The full-window message is only injected with full context, while normal turns get a smaller follow-up only when reported usage first crosses a budget threshold. ## What changed - Added the `TokenBudget` feature flag. - Added `<token_budget>` developer fragments for full context-window metadata and current-window remaining tokens. - Inserted the threshold message during normal turn handling by comparing token usage before and after sampling, avoiding persistent threshold bookkeeping. - Added core integration coverage for full-context-only metadata and 25/50/75 percent threshold messages. ## Verification - `just test -p codex-core token_budget` - `git diff --check`
This commit is contained in:
committed by
GitHub
Unverified
parent
ab4ce40042
commit
658af936fd
@@ -624,6 +624,9 @@
|
||||
"terminal_visualization_instructions": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"token_budget": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"tool_call_mcp_elicitation": {
|
||||
"type": "boolean"
|
||||
},
|
||||
@@ -4759,6 +4762,9 @@
|
||||
"terminal_visualization_instructions": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"token_budget": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"tool_call_mcp_elicitation": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
||||
@@ -294,6 +294,7 @@ async fn run_compact_task_inner_impl(
|
||||
let user_messages = collect_user_messages(history_items);
|
||||
|
||||
let mut new_history = build_compacted_history(Vec::new(), &user_messages, &summary_text);
|
||||
let window_id = sess.advance_auto_compact_window_id().await;
|
||||
|
||||
if matches!(
|
||||
initial_context_injection,
|
||||
@@ -310,7 +311,7 @@ async fn run_compact_task_inner_impl(
|
||||
let compacted_item = CompactedItem {
|
||||
message: summary_text.clone(),
|
||||
replacement_history: Some(new_history.clone()),
|
||||
window_id: None,
|
||||
window_id: Some(window_id),
|
||||
};
|
||||
sess.replace_compacted_history(new_history, reference_context_item, compacted_item)
|
||||
.await;
|
||||
|
||||
@@ -249,6 +249,7 @@ async fn run_remote_compact_task_inner_impl(
|
||||
turn_metadata_header.as_deref(),
|
||||
)
|
||||
.await?;
|
||||
let new_window_id = sess.advance_auto_compact_window_id().await;
|
||||
new_history = process_compacted_history(
|
||||
sess.as_ref(),
|
||||
turn_context.as_ref(),
|
||||
@@ -264,7 +265,7 @@ async fn run_remote_compact_task_inner_impl(
|
||||
let compacted_item = CompactedItem {
|
||||
message: String::new(),
|
||||
replacement_history: Some(new_history.clone()),
|
||||
window_id: None,
|
||||
window_id: Some(new_window_id),
|
||||
};
|
||||
// Install is the semantic boundary where the compact endpoint's output becomes live
|
||||
// thread history. Keep it distinct from the later inference request so the reducer can
|
||||
|
||||
@@ -285,6 +285,7 @@ async fn run_remote_compact_task_inner_impl(
|
||||
let (compacted_history, retained_images) =
|
||||
build_v2_compacted_history(&prompt_input, compaction_output);
|
||||
analytics_details.retained_image_count = Some(retained_images);
|
||||
let new_window_id = sess.advance_auto_compact_window_id().await;
|
||||
let new_history = process_compacted_history(
|
||||
sess.as_ref(),
|
||||
turn_context.as_ref(),
|
||||
@@ -300,7 +301,7 @@ async fn run_remote_compact_task_inner_impl(
|
||||
let compacted_item = CompactedItem {
|
||||
message: String::new(),
|
||||
replacement_history: Some(new_history.clone()),
|
||||
window_id: None,
|
||||
window_id: Some(new_window_id),
|
||||
};
|
||||
compaction_trace.record_installed(&CompactionCheckpointTracePayload {
|
||||
input_history: &trace_input_history,
|
||||
|
||||
@@ -23,6 +23,7 @@ mod realtime_end_instructions;
|
||||
mod realtime_start_instructions;
|
||||
mod realtime_start_with_instructions;
|
||||
mod subagent_notification;
|
||||
mod token_budget_context;
|
||||
mod turn_aborted;
|
||||
mod user_instructions;
|
||||
mod user_shell_command;
|
||||
@@ -60,6 +61,8 @@ pub(crate) use realtime_end_instructions::RealtimeEndInstructions;
|
||||
pub(crate) use realtime_start_instructions::RealtimeStartInstructions;
|
||||
pub(crate) use realtime_start_with_instructions::RealtimeStartWithInstructions;
|
||||
pub(crate) use subagent_notification::SubagentNotification;
|
||||
pub(crate) use token_budget_context::TokenBudgetContext;
|
||||
pub(crate) use token_budget_context::TokenBudgetRemainingContext;
|
||||
pub(crate) use turn_aborted::TurnAborted;
|
||||
pub(crate) use user_instructions::UserInstructions;
|
||||
pub(crate) use user_shell_command::UserShellCommand;
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
use super::ContextualUserFragment;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct TokenBudgetContext {
|
||||
window_id: u64,
|
||||
tokens_left: i64,
|
||||
}
|
||||
|
||||
impl TokenBudgetContext {
|
||||
pub(crate) fn new(window_id: u64, tokens_left: i64) -> Self {
|
||||
Self {
|
||||
window_id,
|
||||
tokens_left,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextualUserFragment for TokenBudgetContext {
|
||||
fn role(&self) -> &'static str {
|
||||
"developer"
|
||||
}
|
||||
|
||||
fn markers(&self) -> (&'static str, &'static str) {
|
||||
Self::type_markers()
|
||||
}
|
||||
|
||||
fn type_markers() -> (&'static str, &'static str) {
|
||||
("<token_budget>\n", "\n</token_budget>")
|
||||
}
|
||||
|
||||
fn body(&self) -> String {
|
||||
let window_id = self.window_id;
|
||||
let tokens_left = self.tokens_left;
|
||||
format!(
|
||||
"Current context window {window_id}.\nYou have {tokens_left} tokens left in this context window."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct TokenBudgetRemainingContext {
|
||||
tokens_left: i64,
|
||||
}
|
||||
|
||||
impl TokenBudgetRemainingContext {
|
||||
pub(crate) fn new(tokens_left: i64) -> Self {
|
||||
Self { tokens_left }
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextualUserFragment for TokenBudgetRemainingContext {
|
||||
fn role(&self) -> &'static str {
|
||||
"developer"
|
||||
}
|
||||
|
||||
fn markers(&self) -> (&'static str, &'static str) {
|
||||
Self::type_markers()
|
||||
}
|
||||
|
||||
fn type_markers() -> (&'static str, &'static str) {
|
||||
("<token_budget>\n", "\n</token_budget>")
|
||||
}
|
||||
|
||||
fn body(&self) -> String {
|
||||
let tokens_left = self.tokens_left;
|
||||
format!("You have {tokens_left} tokens left in this context window.")
|
||||
}
|
||||
}
|
||||
@@ -30,6 +30,7 @@ const CONTEXTUAL_DEVELOPER_PREFIXES: &[&str] = &[
|
||||
COLLABORATION_MODE_OPEN_TAG,
|
||||
REALTIME_CONVERSATION_OPEN_TAG,
|
||||
"<personality_spec>",
|
||||
"<token_budget>",
|
||||
];
|
||||
|
||||
pub(crate) fn is_contextual_user_message_content(message: &[ContentItem]) -> bool {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use super::has_non_contextual_dev_message_content;
|
||||
use super::is_contextual_dev_message_content;
|
||||
use super::parse_turn_item;
|
||||
use crate::context::ContextualUserFragment;
|
||||
use crate::context::InternalContextSource;
|
||||
@@ -16,6 +18,17 @@ use codex_protocol::models::WebSearchAction;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn recognizes_token_budget_as_contextual_developer_content() {
|
||||
let content = vec![ContentItem::InputText {
|
||||
text: "<token_budget>\nYou have 710 tokens left in this context window.\n</token_budget>"
|
||||
.to_string(),
|
||||
}];
|
||||
|
||||
assert!(is_contextual_dev_message_content(&content));
|
||||
assert!(!has_non_contextual_dev_message_content(&content));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_user_message_with_text_and_two_images() {
|
||||
let img1 = "https://example.com/one.png".to_string();
|
||||
|
||||
@@ -207,6 +207,7 @@ mod review;
|
||||
mod rollout_reconstruction;
|
||||
#[allow(clippy::module_inception)]
|
||||
pub(crate) mod session;
|
||||
mod token_budget;
|
||||
pub(crate) mod turn;
|
||||
pub(crate) mod turn_context;
|
||||
use self::config_lock::export_config_lock_if_configured;
|
||||
@@ -2712,15 +2713,13 @@ impl Session {
|
||||
&self,
|
||||
items: Vec<ResponseItem>,
|
||||
reference_context_item: Option<TurnContextItem>,
|
||||
mut compacted_item: CompactedItem,
|
||||
compacted_item: CompactedItem,
|
||||
) {
|
||||
{
|
||||
let mut state = self.state.lock().await;
|
||||
state.replace_history(items, reference_context_item.clone());
|
||||
}
|
||||
|
||||
compacted_item.window_id = Some(self.advance_auto_compact_window_id().await);
|
||||
|
||||
self.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)])
|
||||
.await;
|
||||
if let Some(turn_context_item) = reference_context_item {
|
||||
@@ -2805,6 +2804,7 @@ impl Session {
|
||||
collaboration_mode,
|
||||
base_instructions,
|
||||
session_source,
|
||||
auto_compact_window_id,
|
||||
) = {
|
||||
let state = self.state.lock().await;
|
||||
(
|
||||
@@ -2813,6 +2813,7 @@ impl Session {
|
||||
state.session_configuration.collaboration_mode.clone(),
|
||||
state.session_configuration.base_instructions.clone(),
|
||||
state.session_configuration.session_source.clone(),
|
||||
state.auto_compact_window_id(),
|
||||
)
|
||||
};
|
||||
if let Some(model_switch_message) =
|
||||
@@ -2962,6 +2963,18 @@ impl Session {
|
||||
.render(),
|
||||
);
|
||||
}
|
||||
// This is full-context metadata. Steady-state context diffs should not re-emit it.
|
||||
if turn_context.features.enabled(Feature::TokenBudget)
|
||||
&& let Some(model_context_window) = turn_context.model_context_window()
|
||||
{
|
||||
developer_sections.push(
|
||||
crate::context::TokenBudgetContext::new(
|
||||
auto_compact_window_id,
|
||||
model_context_window,
|
||||
)
|
||||
.render(),
|
||||
);
|
||||
}
|
||||
if turn_context.config.include_environment_context {
|
||||
let shell = self.user_shell();
|
||||
let subagents = self
|
||||
@@ -3040,7 +3053,7 @@ impl Session {
|
||||
format!("{thread_id}:{window_id}")
|
||||
}
|
||||
|
||||
async fn advance_auto_compact_window_id(&self) -> u64 {
|
||||
pub(crate) async fn advance_auto_compact_window_id(&self) -> u64 {
|
||||
let mut state = self.state.lock().await;
|
||||
state.advance_auto_compact_window_id()
|
||||
}
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
use super::session::Session;
|
||||
use super::turn_context::TurnContext;
|
||||
use crate::context::ContextualUserFragment;
|
||||
use codex_features::Feature;
|
||||
|
||||
const TOKEN_BUDGET_USAGE_THRESHOLDS: [i64; 3] = [25, 50, 75];
|
||||
|
||||
pub(super) async fn maybe_record_token_budget_remaining_context(
|
||||
sess: &Session,
|
||||
turn_context: &TurnContext,
|
||||
tokens_before_sampling: i64,
|
||||
tokens_after_sampling: i64,
|
||||
) {
|
||||
if !turn_context.features.enabled(Feature::TokenBudget) {
|
||||
return;
|
||||
}
|
||||
let Some(model_context_window) = turn_context.model_context_window() else {
|
||||
return;
|
||||
};
|
||||
if model_context_window <= 0 || tokens_after_sampling <= tokens_before_sampling {
|
||||
return;
|
||||
}
|
||||
|
||||
let tokens_before_sampling = tokens_before_sampling.max(0);
|
||||
let tokens_after_sampling = tokens_after_sampling.max(0);
|
||||
let crossed_threshold = TOKEN_BUDGET_USAGE_THRESHOLDS.iter().any(|threshold| {
|
||||
tokens_before_sampling.saturating_mul(100) < model_context_window.saturating_mul(*threshold)
|
||||
&& tokens_after_sampling.saturating_mul(100)
|
||||
>= model_context_window.saturating_mul(*threshold)
|
||||
});
|
||||
if !crossed_threshold {
|
||||
return;
|
||||
}
|
||||
|
||||
let tokens_left = model_context_window
|
||||
.saturating_sub(tokens_after_sampling)
|
||||
.max(0);
|
||||
|
||||
let response_item = ContextualUserFragment::into(
|
||||
crate::context::TokenBudgetRemainingContext::new(tokens_left),
|
||||
);
|
||||
sess.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
|
||||
.await;
|
||||
}
|
||||
@@ -225,6 +225,7 @@ pub(crate) async fn run_turn(
|
||||
let turn_metadata_header = turn_context
|
||||
.turn_metadata_state
|
||||
.current_header_value_for_model_request(&window_id);
|
||||
let tokens_before_sampling = sess.get_total_token_usage().await;
|
||||
match run_sampling_request(
|
||||
Arc::clone(&sess),
|
||||
Arc::clone(&turn_context),
|
||||
@@ -275,6 +276,15 @@ pub(crate) async fn run_turn(
|
||||
"post sampling token usage"
|
||||
);
|
||||
|
||||
let tokens_after_sampling = token_status.active_context_tokens;
|
||||
super::token_budget::maybe_record_token_budget_remaining_context(
|
||||
sess.as_ref(),
|
||||
turn_context.as_ref(),
|
||||
tokens_before_sampling,
|
||||
tokens_after_sampling,
|
||||
)
|
||||
.await;
|
||||
|
||||
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
|
||||
if token_limit_reached && needs_follow_up {
|
||||
if let Err(err) = run_auto_compact(
|
||||
|
||||
@@ -110,6 +110,7 @@ mod sqlite_state;
|
||||
mod stream_error_allows_next_turn;
|
||||
mod stream_no_completed;
|
||||
mod subagent_notifications;
|
||||
mod token_budget;
|
||||
mod tool_harness;
|
||||
mod tool_parallelism;
|
||||
mod tools;
|
||||
|
||||
@@ -0,0 +1,222 @@
|
||||
use anyhow::Result;
|
||||
use codex_features::Feature;
|
||||
use codex_model_provider_info::built_in_model_providers;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::Op;
|
||||
use core_test_support::PathBufExt;
|
||||
use core_test_support::responses::ResponsesRequest;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_completed_with_tokens;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::local;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
const CONFIGURED_CONTEXT_WINDOW: i64 = 128_000;
|
||||
const EFFECTIVE_CONTEXT_WINDOW: i64 = CONFIGURED_CONTEXT_WINDOW * 95 / 100;
|
||||
|
||||
fn token_budget_texts(request: &ResponsesRequest) -> Vec<String> {
|
||||
request
|
||||
.message_input_texts("developer")
|
||||
.into_iter()
|
||||
.filter(|text| text.starts_with("<token_budget>"))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn token_budget_context_is_only_emitted_with_full_context() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let responses = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
||||
sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
let test = test_codex()
|
||||
.with_config(|config| {
|
||||
config.model_context_window = Some(CONFIGURED_CONTEXT_WINDOW);
|
||||
config
|
||||
.features
|
||||
.enable(Feature::TokenBudget)
|
||||
.expect("test config should allow token budget");
|
||||
})
|
||||
.build(&server)
|
||||
.await?;
|
||||
|
||||
test.submit_turn("first turn").await?;
|
||||
|
||||
let second_cwd = test.workspace_path("second-cwd");
|
||||
std::fs::create_dir_all(&second_cwd)?;
|
||||
test.submit_turn_with_environments("second turn", Some(vec![local(second_cwd.abs())]))
|
||||
.await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
|
||||
let expected = vec![format!(
|
||||
"<token_budget>\nCurrent context window 0.\nYou have {EFFECTIVE_CONTEXT_WINDOW} tokens left in this context window.\n</token_budget>"
|
||||
)];
|
||||
assert_eq!(
|
||||
token_budget_texts(&requests[0]),
|
||||
expected,
|
||||
"initial full context should report context window 0"
|
||||
);
|
||||
assert_eq!(
|
||||
token_budget_texts(&requests[1]),
|
||||
expected,
|
||||
"steady-state context update should not advance the context window"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn token_budget_remaining_context_emits_on_first_threshold_crossing() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let responses = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_completed_with_tokens("resp-1", /*total_tokens*/ 2_500),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_completed_with_tokens("resp-2", /*total_tokens*/ 3_000),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_completed_with_tokens("resp-3", /*total_tokens*/ 5_000),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-4"),
|
||||
ev_completed_with_tokens("resp-4", /*total_tokens*/ 8_000),
|
||||
]),
|
||||
sse(vec![ev_response_created("resp-5"), ev_completed("resp-5")]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
let test = test_codex()
|
||||
.with_config(|config| {
|
||||
config.model_context_window = Some(10_000);
|
||||
config
|
||||
.features
|
||||
.enable(Feature::TokenBudget)
|
||||
.expect("test config should allow token budget");
|
||||
})
|
||||
.build(&server)
|
||||
.await?;
|
||||
|
||||
for turn in 1..=5 {
|
||||
test.submit_turn(&format!("turn {turn}")).await?;
|
||||
}
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 5);
|
||||
|
||||
let full_context = "<token_budget>\nCurrent context window 0.\nYou have 9500 tokens left in this context window.\n</token_budget>"
|
||||
.to_string();
|
||||
let threshold_25 =
|
||||
"<token_budget>\nYou have 7000 tokens left in this context window.\n</token_budget>"
|
||||
.to_string();
|
||||
let threshold_50 =
|
||||
"<token_budget>\nYou have 4500 tokens left in this context window.\n</token_budget>"
|
||||
.to_string();
|
||||
let threshold_75 =
|
||||
"<token_budget>\nYou have 1500 tokens left in this context window.\n</token_budget>"
|
||||
.to_string();
|
||||
|
||||
assert_eq!(token_budget_texts(&requests[0]), vec![full_context.clone()]);
|
||||
assert_eq!(
|
||||
token_budget_texts(&requests[1]),
|
||||
vec![full_context.clone(), threshold_25.clone()]
|
||||
);
|
||||
assert_eq!(
|
||||
token_budget_texts(&requests[2]),
|
||||
vec![full_context.clone(), threshold_25.clone()]
|
||||
);
|
||||
assert_eq!(
|
||||
token_budget_texts(&requests[3]),
|
||||
vec![
|
||||
full_context.clone(),
|
||||
threshold_25.clone(),
|
||||
threshold_50.clone()
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
token_budget_texts(&requests[4]),
|
||||
vec![full_context, threshold_25, threshold_50, threshold_75]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn token_budget_context_uses_new_window_after_compaction() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let responses = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-compact"),
|
||||
ev_assistant_message("msg-compact", "compact summary"),
|
||||
ev_completed("resp-compact"),
|
||||
]),
|
||||
sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut model_provider = built_in_model_providers(/*openai_base_url*/ None)["openai"].clone();
|
||||
model_provider.name = "OpenAI-compatible test provider".to_string();
|
||||
model_provider.base_url = Some(format!("{}/v1", server.uri()));
|
||||
model_provider.supports_websockets = false;
|
||||
|
||||
let test = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
config.model_context_window = Some(CONFIGURED_CONTEXT_WINDOW);
|
||||
config
|
||||
.features
|
||||
.enable(Feature::TokenBudget)
|
||||
.expect("test config should allow token budget");
|
||||
})
|
||||
.build(&server)
|
||||
.await?;
|
||||
|
||||
test.submit_turn("before compact").await?;
|
||||
test.codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
test.submit_turn("after compact").await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
token_budget_texts(&requests[2]),
|
||||
vec![format!(
|
||||
"<token_budget>\nCurrent context window 1.\nYou have {EFFECTIVE_CONTEXT_WINDOW} tokens left in this context window.\n</token_budget>"
|
||||
)],
|
||||
"post-compaction full context should report context window 1"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -197,6 +197,8 @@ pub enum Feature {
|
||||
GuardianApproval,
|
||||
/// Enable persisted thread goals and automatic goal continuation.
|
||||
Goals,
|
||||
/// Add current context-window metadata to model-visible context.
|
||||
TokenBudget,
|
||||
/// Route MCP tool approval prompts through the MCP elicitation request path.
|
||||
ToolCallMcpElicitation,
|
||||
/// Prompt Codex Apps connector auth failures through MCP URL elicitations.
|
||||
@@ -1142,6 +1144,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::TokenBudget,
|
||||
key: "token_budget",
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::CollaborationModes,
|
||||
key: "collaboration_modes",
|
||||
|
||||
Reference in New Issue
Block a user