[codex] simplify token budget context (#29295)

## Why

The token-budget feature currently adds remaining-token messages
whenever usage crosses the 25%, 50%, and 75% thresholds. Those periodic
inserts create prompt churn without requiring action, while the
near-compaction reminder and explicit `get_context_remaining` tool
already cover actionable and on-demand budget information.

The context-window lineage block is also easier to scan as plain labeled
text than as a `<token_budget>`-wrapped fragment.

## What changed

- Stop recording automatic remaining-token messages at percentage
thresholds.
- Render context-window lineage in `First`, `Current`, `Previous` order
with colon-separated labels.
- Omit the `Previous` line for the first context window.
- Remove `<token_budget>` wrappers from newly rendered lineage,
near-compaction reminders, and `get_context_remaining` output.
- Keep recognizing legacy wrapped fragments so existing rollouts remain
compatible.
- Remove the post-sampling token snapshot that was only needed by the
periodic threshold path.

## Testing

- `just test -p codex-core token_budget` (11 tests passed)
This commit is contained in:
pakrym-oai
2026-06-20 21:50:09 -07:00
committed by GitHub
Unverified
parent 6df037d47f
commit aaf737fa59
7 changed files with 65 additions and 195 deletions
@@ -39,24 +39,25 @@ impl ContextualUserFragment for TokenBudgetContext {
}
fn type_markers() -> (&'static str, &'static str) {
("<token_budget>\n", "\n</token_budget>")
("", "")
}
fn body(&self) -> String {
let thread_id = self.thread_id;
let first_window_id = self.first_window_id;
let previous_window_id = self
.previous_window_id
.map_or_else(|| "none".to_string(), |window_id| window_id.to_string());
let window_id = self.window_id;
let mcp_result = self
.mcp_result
.as_deref()
.map(|result| format!("\n{result}"))
.unwrap_or_default();
format!(
"Thread id {thread_id}.\nFirst context window id {first_window_id}.\nPrevious context window id {previous_window_id}.\nCurrent context window id {window_id}.{mcp_result}"
)
let mut lines = vec![
format!("Thread id {thread_id}."),
format!("First context window id: {first_window_id}"),
format!("Current context window id: {window_id}"),
];
if let Some(previous_window_id) = self.previous_window_id {
lines.push(format!("Previous context window id: {previous_window_id}"));
}
if let Some(mcp_result) = &self.mcp_result {
lines.push(mcp_result.clone());
}
lines.join("\n")
}
}
@@ -87,7 +88,7 @@ impl ContextualUserFragment for TokenBudgetRemainingContext {
}
fn type_markers() -> (&'static str, &'static str) {
("<token_budget>\n", "\n</token_budget>")
("", "")
}
fn body(&self) -> String {
@@ -123,7 +124,7 @@ impl ContextualUserFragment for TokenBudgetReminder {
}
fn type_markers() -> (&'static str, &'static str) {
("<token_budget>\n", "\n</token_budget>")
("", "")
}
fn body(&self) -> String {
+1
View File
@@ -34,6 +34,7 @@ const CONTEXTUAL_DEVELOPER_PREFIXES: &[&str] = &[
REALTIME_CONVERSATION_OPEN_TAG,
SKILLS_INSTRUCTIONS_OPEN_TAG,
"<personality_spec>",
// Keep recognizing token-budget wrappers persisted by older versions.
"<token_budget>",
"<rollout_budget>",
];
+1 -1
View File
@@ -29,7 +29,7 @@ fn recognizes_skills_instructions_as_contextual_developer_content() {
}
#[test]
fn recognizes_token_budget_as_contextual_developer_content() {
fn recognizes_legacy_token_budget_as_contextual_developer_content() {
let content = vec![ContentItem::InputText {
text: "<token_budget>\nYou have 710 tokens left in this context window.\n</token_budget>"
.to_string(),
+17 -45
View File
@@ -3,63 +3,35 @@ use super::turn_context::TurnContext;
use crate::context::ContextualUserFragment;
use codex_features::Feature;
const TOKEN_BUDGET_USAGE_THRESHOLDS: [i64; 3] = [25, 50, 75];
pub(super) async fn maybe_record(
sess: &Session,
turn_context: &TurnContext,
tokens_before_sampling: i64,
tokens_after_sampling: i64,
tokens_until_compaction: i64,
) {
if !turn_context.config.features.enabled(Feature::TokenBudget) {
return;
}
let mut response_items = Vec::new();
if let Some(model_context_window) = turn_context.model_context_window()
&& model_context_window > 0
&& tokens_after_sampling > tokens_before_sampling
{
let tokens_before_sampling = tokens_before_sampling.max(0);
let tokens_after_sampling = tokens_after_sampling.max(0);
let crossed_threshold = TOKEN_BUDGET_USAGE_THRESHOLDS.iter().any(|threshold| {
tokens_before_sampling.saturating_mul(100)
< model_context_window.saturating_mul(*threshold)
&& tokens_after_sampling.saturating_mul(100)
>= model_context_window.saturating_mul(*threshold)
});
if crossed_threshold {
let tokens_left = model_context_window
.saturating_sub(tokens_after_sampling)
.max(0);
response_items.push(ContextualUserFragment::into(
crate::context::TokenBudgetRemainingContext::new(tokens_left),
));
}
}
if let Some(config) = turn_context.config.token_budget.as_ref().filter(|config| {
let Some(config) = turn_context.config.token_budget.as_ref().filter(|config| {
config
.reminder_threshold_tokens
.is_some_and(|threshold| tokens_until_compaction <= threshold)
}) {
let reminder_due = {
let mut state = sess.state.lock().await;
state.claim_token_budget_reminder()
};
if reminder_due {
response_items.push(ContextualUserFragment::into(
crate::context::TokenBudgetReminder::new(
&config.reminder_message_template,
tokens_until_compaction,
),
));
}
}) else {
return;
};
let reminder_due = {
let mut state = sess.state.lock().await;
state.claim_token_budget_reminder()
};
if !reminder_due {
return;
}
if !response_items.is_empty() {
sess.record_conversation_items(turn_context, &response_items)
.await;
}
let response_item = ContextualUserFragment::into(crate::context::TokenBudgetReminder::new(
&config.reminder_message_template,
tokens_until_compaction,
));
sess.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
.await;
}
+3 -12
View File
@@ -254,8 +254,7 @@ pub(crate) async fn run_turn(
window_id,
CodexResponsesRequestKind::Turn,
);
let tokens_before_sampling = sess.get_total_token_usage().await;
let (sampling_request_output, sampling_request_input) = run_sampling_request(
run_sampling_request(
Arc::clone(&sess),
Arc::clone(&turn_context),
Arc::clone(&turn_extension_data),
@@ -265,17 +264,11 @@ pub(crate) async fn run_turn(
sampling_request_input,
cancellation_token.child_token(),
)
.await?;
Ok((
tokens_before_sampling,
sampling_request_output,
sampling_request_input,
))
.await
}
.await;
match sampling_request_result {
Ok((tokens_before_sampling, sampling_request_output, sampling_request_input)) => {
Ok((sampling_request_output, sampling_request_input)) => {
let SamplingRequestResult {
needs_follow_up: model_needs_follow_up,
last_agent_message: sampling_request_last_agent_message,
@@ -326,8 +319,6 @@ pub(crate) async fn run_turn(
super::token_budget::maybe_record(
sess.as_ref(),
turn_context.as_ref(),
tokens_before_sampling,
tokens_after_sampling,
tokens_until_compaction,
)
.await;
@@ -9,7 +9,7 @@ Scenario: New context window tool installs fresh full context before the next fo
00:message/developer[3]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
[03] <token_budget>\nThread id <THREAD_ID>.\nFirst context window id <FIRST_WINDOW_ID>.\nPrevious context window id <FIRST_WINDOW_ID>.\nCurrent context window id <WINDOW_ID>.\n</token_budget>
[03] Thread id <THREAD_ID>.\nFirst context window id: <FIRST_WINDOW_ID>\nCurrent context window id: <WINDOW_ID>\nPrevious context window id: <FIRST_WINDOW_ID>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:function_call/update_plan
03:function_call_output:Plan updated
+27 -122
View File
@@ -33,11 +33,11 @@ use std::time::Duration;
const CONFIGURED_CONTEXT_WINDOW: i64 = 128_000;
fn token_budget_texts(request: &ResponsesRequest) -> Vec<String> {
fn token_budget_contexts(request: &ResponsesRequest) -> Vec<String> {
request
.message_input_texts("developer")
.into_iter()
.filter(|text| text.starts_with("<token_budget>"))
.filter(|text| text.starts_with("Thread id "))
.collect()
}
@@ -47,7 +47,7 @@ fn token_budget_window_ids(
) -> (String, Option<String>, String) {
let captures = assert_regex_match(
&format!(
r"^<token_budget>\nThread id {thread_id}\.\nFirst context window id ([0-9a-f-]{{36}})\.\nPrevious context window id (none|[0-9a-f-]{{36}})\.\nCurrent context window id ([0-9a-f-]{{36}})\.\n</token_budget>$"
r"^Thread id {thread_id}\.\nFirst context window id: ([0-9a-f-]{{36}})\nCurrent context window id: ([0-9a-f-]{{36}})(?:\nPrevious context window id: ([0-9a-f-]{{36}}))?$"
),
text,
);
@@ -56,16 +56,12 @@ fn token_budget_window_ids(
.expect("first window id capture")
.as_str()
.to_string();
let previous_window_id = captures
.get(2)
.expect("previous window id capture")
.as_str();
let previous_window_id = (previous_window_id != "none").then(|| previous_window_id.to_string());
let window_id = captures
.get(3)
.get(2)
.expect("window id capture")
.as_str()
.to_string();
let previous_window_id = captures.get(3).map(|capture| capture.as_str().to_string());
(first_window_id, previous_window_id, window_id)
}
@@ -115,14 +111,14 @@ async fn token_budget_context_is_only_emitted_with_full_context() -> Result<()>
assert_eq!(requests.len(), 2);
let thread_id = test.session_configured.thread_id;
let initial_token_budget = token_budget_texts(&requests[0]);
let initial_token_budget = token_budget_contexts(&requests[0]);
assert_eq!(initial_token_budget.len(), 1);
let (first_window_id, previous_window_id, window_id) =
token_budget_window_ids(&initial_token_budget[0], thread_id);
assert_eq!(previous_window_id, None);
assert_eq!(first_window_id, window_id);
assert_eq!(
token_budget_texts(&requests[1]),
token_budget_contexts(&requests[1]),
initial_token_budget,
"steady-state context update should not advance the context window"
);
@@ -191,11 +187,11 @@ async fn token_budget_context_injects_plain_thread_hint_text() -> Result<()> {
let request = responses.single_request();
let thread_id = test.session_configured.thread_id;
let token_budgets = token_budget_texts(&request);
let token_budgets = token_budget_contexts(&request);
assert_eq!(token_budgets.len(), 1);
let captures = assert_regex_match(
&format!(
r"^<token_budget>\nThread id {thread_id}\.\nFirst context window id ([0-9a-f-]{{36}})\.\nPrevious context window id none\.\nCurrent context window id ([0-9a-f-]{{36}})\.\nmanual history hint for thread {thread_id}\nunstructured notes/thread_hint fixture result\n</token_budget>$"
r"^Thread id {thread_id}\.\nFirst context window id: ([0-9a-f-]{{36}})\nCurrent context window id: ([0-9a-f-]{{36}})\nmanual history hint for thread {thread_id}\nunstructured notes/thread_hint fixture result$"
),
&token_budgets[0],
);
@@ -213,92 +209,6 @@ async fn token_budget_context_injects_plain_thread_hint_text() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn token_budget_remaining_context_emits_on_first_threshold_crossing() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let responses = mount_sse_sequence(
&server,
vec![
sse(vec![
ev_response_created("resp-1"),
ev_completed_with_tokens("resp-1", /*total_tokens*/ 2_500),
]),
sse(vec![
ev_response_created("resp-2"),
ev_completed_with_tokens("resp-2", /*total_tokens*/ 3_000),
]),
sse(vec![
ev_response_created("resp-3"),
ev_completed_with_tokens("resp-3", /*total_tokens*/ 5_000),
]),
sse(vec![
ev_response_created("resp-4"),
ev_completed_with_tokens("resp-4", /*total_tokens*/ 8_000),
]),
sse(vec![ev_response_created("resp-5"), ev_completed("resp-5")]),
],
)
.await;
let test = test_codex()
.with_config(|config| {
config.model_context_window = Some(10_000);
config
.features
.enable(Feature::TokenBudget)
.expect("test config should allow token budget");
})
.build(&server)
.await?;
for turn in 1..=5 {
test.submit_turn(&format!("turn {turn}")).await?;
}
let requests = responses.requests();
assert_eq!(requests.len(), 5);
let thread_id = test.session_configured.thread_id;
let full_context = token_budget_texts(&requests[0]);
assert_eq!(full_context.len(), 1);
token_budget_window_ids(&full_context[0], thread_id);
let full_context = full_context[0].clone();
let threshold_25 =
"<token_budget>\nYou have 7000 tokens left in this context window.\n</token_budget>"
.to_string();
let threshold_50 =
"<token_budget>\nYou have 4500 tokens left in this context window.\n</token_budget>"
.to_string();
let threshold_75 =
"<token_budget>\nYou have 1500 tokens left in this context window.\n</token_budget>"
.to_string();
assert_eq!(token_budget_texts(&requests[0]), vec![full_context.clone()]);
assert_eq!(
token_budget_texts(&requests[1]),
vec![full_context.clone(), threshold_25.clone()]
);
assert_eq!(
token_budget_texts(&requests[2]),
vec![full_context.clone(), threshold_25.clone()]
);
assert_eq!(
token_budget_texts(&requests[3]),
vec![
full_context.clone(),
threshold_25.clone(),
threshold_50.clone()
]
);
assert_eq!(
token_budget_texts(&requests[4]),
vec![full_context, threshold_25, threshold_50, threshold_75]
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn token_budget_reminder_emits_after_crossing_compaction_threshold() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -335,17 +245,18 @@ async fn token_budget_reminder_emits_after_crossing_compaction_threshold() -> Re
let requests = responses.requests();
assert_eq!(requests.len(), 2);
let initial_context = token_budget_texts(&requests[0]);
let initial_context = token_budget_contexts(&requests[0]);
assert_eq!(initial_context.len(), 1);
let remaining_context =
"<token_budget>\nYou have 1500 tokens left in this context window.\n</token_budget>"
.to_string();
let reminder = "<token_budget>\nYour context window is nearly exhausted (only 1000 tokens remaining) and will be automatically reset for you soon. Once reset, message items in current context window will be cleared in the new window, but notes and history items will be persistent across windows.\n</token_budget>"
.to_string();
let reminder = "Your context window is nearly exhausted (only 1000 tokens remaining) and will be automatically reset for you soon. Once reset, message items in current context window will be cleared in the new window, but notes and history items will be persistent across windows.";
assert_eq!(
token_budget_texts(&requests[1]),
vec![initial_context[0].clone(), remaining_context, reminder]
requests[1]
.message_input_texts("developer")
.into_iter()
.filter(|text| text == reminder)
.count(),
1
);
assert_eq!(token_budget_contexts(&requests[1]), initial_context);
Ok(())
}
@@ -401,13 +312,10 @@ async fn get_context_remaining_returns_token_budget_remaining_fragment() -> Resu
);
let thread_id = test.session_configured.thread_id;
let remaining_context =
"<token_budget>\nYou have 7000 tokens left in this context window.\n</token_budget>"
.to_string();
let token_budgets = token_budget_texts(&requests[1]);
assert_eq!(token_budgets.len(), 2);
let remaining_context = "You have 7000 tokens left in this context window.".to_string();
let token_budgets = token_budget_contexts(&requests[1]);
assert_eq!(token_budgets.len(), 1);
token_budget_window_ids(&token_budgets[0], thread_id);
assert_eq!(token_budgets[1], remaining_context);
assert_eq!(
requests[2].function_call_output_content_and_success(call_id),
Some((Some(remaining_context), None))
@@ -464,14 +372,11 @@ async fn get_context_remaining_returns_unknown_when_window_is_unavailable() -> R
"get_context_remaining should be exposed when token budget is enabled"
);
assert_eq!(token_budget_texts(&requests[0]), Vec::<String>::new());
assert_eq!(token_budget_contexts(&requests[0]), Vec::<String>::new());
assert_eq!(
requests[1].function_call_output_content_and_success(call_id),
Some((
Some(
"<token_budget>\nYou have unknown tokens left in this context window.\n</token_budget>"
.to_string()
),
Some("You have unknown tokens left in this context window.".to_string()),
None,
))
);
@@ -527,11 +432,11 @@ async fn token_budget_context_uses_new_window_after_compaction() -> Result<()> {
assert_eq!(requests.len(), 3);
let thread_id = test.session_configured.thread_id;
let initial_token_budget = token_budget_texts(&requests[0]);
let initial_token_budget = token_budget_contexts(&requests[0]);
assert_eq!(initial_token_budget.len(), 1);
let (initial_first_window_id, initial_previous_window_id, initial_window_id) =
token_budget_window_ids(&initial_token_budget[0], thread_id);
let post_compaction_token_budget = token_budget_texts(&requests[2]);
let post_compaction_token_budget = token_budget_contexts(&requests[2]);
assert_eq!(post_compaction_token_budget.len(), 1);
let (
post_compaction_first_window_id,
@@ -606,11 +511,11 @@ async fn new_context_tool_starts_new_window_before_follow_up() -> Result<()> {
"new_context should be exposed when token budget is enabled"
);
let thread_id = test.session_configured.thread_id;
let initial_token_budget = token_budget_texts(&requests[0]);
let initial_token_budget = token_budget_contexts(&requests[0]);
assert_eq!(initial_token_budget.len(), 1);
let (initial_first_window_id, _, initial_window_id) =
token_budget_window_ids(&initial_token_budget[0], thread_id);
let new_window_token_budget = token_budget_texts(&requests[2]);
let new_window_token_budget = token_budget_contexts(&requests[2]);
assert_eq!(new_window_token_budget.len(), 1);
let (new_first_window_id, new_previous_window_id, new_window_id) =
token_budget_window_ids(&new_window_token_budget[0], thread_id);