diff --git a/codex-rs/core/src/context/token_budget_context.rs b/codex-rs/core/src/context/token_budget_context.rs
index 93d5e1e6a..55a94b2b1 100644
--- a/codex-rs/core/src/context/token_budget_context.rs
+++ b/codex-rs/core/src/context/token_budget_context.rs
@@ -39,24 +39,25 @@ impl ContextualUserFragment for TokenBudgetContext {
}
fn type_markers() -> (&'static str, &'static str) {
- ("\n", "\n")
+ ("", "")
}
fn body(&self) -> String {
let thread_id = self.thread_id;
let first_window_id = self.first_window_id;
- let previous_window_id = self
- .previous_window_id
- .map_or_else(|| "none".to_string(), |window_id| window_id.to_string());
let window_id = self.window_id;
- let mcp_result = self
- .mcp_result
- .as_deref()
- .map(|result| format!("\n{result}"))
- .unwrap_or_default();
- format!(
- "Thread id {thread_id}.\nFirst context window id {first_window_id}.\nPrevious context window id {previous_window_id}.\nCurrent context window id {window_id}.{mcp_result}"
- )
+ let mut lines = vec![
+ format!("Thread id {thread_id}."),
+ format!("First context window id: {first_window_id}"),
+ format!("Current context window id: {window_id}"),
+ ];
+ if let Some(previous_window_id) = self.previous_window_id {
+ lines.push(format!("Previous context window id: {previous_window_id}"));
+ }
+ if let Some(mcp_result) = &self.mcp_result {
+ lines.push(mcp_result.clone());
+ }
+ lines.join("\n")
}
}
@@ -87,7 +88,7 @@ impl ContextualUserFragment for TokenBudgetRemainingContext {
}
fn type_markers() -> (&'static str, &'static str) {
- ("\n", "\n")
+ ("", "")
}
fn body(&self) -> String {
@@ -123,7 +124,7 @@ impl ContextualUserFragment for TokenBudgetReminder {
}
fn type_markers() -> (&'static str, &'static str) {
- ("\n", "\n")
+ ("", "")
}
fn body(&self) -> String {
diff --git a/codex-rs/core/src/event_mapping.rs b/codex-rs/core/src/event_mapping.rs
index 1ca097e78..4c57c5cad 100644
--- a/codex-rs/core/src/event_mapping.rs
+++ b/codex-rs/core/src/event_mapping.rs
@@ -34,6 +34,7 @@ const CONTEXTUAL_DEVELOPER_PREFIXES: &[&str] = &[
REALTIME_CONVERSATION_OPEN_TAG,
SKILLS_INSTRUCTIONS_OPEN_TAG,
"",
+ // Keep recognizing token-budget wrappers persisted by older versions.
"",
"",
];
diff --git a/codex-rs/core/src/event_mapping_tests.rs b/codex-rs/core/src/event_mapping_tests.rs
index b7453c902..322fe7d00 100644
--- a/codex-rs/core/src/event_mapping_tests.rs
+++ b/codex-rs/core/src/event_mapping_tests.rs
@@ -29,7 +29,7 @@ fn recognizes_skills_instructions_as_contextual_developer_content() {
}
#[test]
-fn recognizes_token_budget_as_contextual_developer_content() {
+fn recognizes_legacy_token_budget_as_contextual_developer_content() {
let content = vec![ContentItem::InputText {
text: "\nYou have 710 tokens left in this context window.\n"
.to_string(),
diff --git a/codex-rs/core/src/session/token_budget.rs b/codex-rs/core/src/session/token_budget.rs
index 017b7d542..17f27c106 100644
--- a/codex-rs/core/src/session/token_budget.rs
+++ b/codex-rs/core/src/session/token_budget.rs
@@ -3,63 +3,35 @@ use super::turn_context::TurnContext;
use crate::context::ContextualUserFragment;
use codex_features::Feature;
-const TOKEN_BUDGET_USAGE_THRESHOLDS: [i64; 3] = [25, 50, 75];
-
pub(super) async fn maybe_record(
sess: &Session,
turn_context: &TurnContext,
- tokens_before_sampling: i64,
- tokens_after_sampling: i64,
tokens_until_compaction: i64,
) {
if !turn_context.config.features.enabled(Feature::TokenBudget) {
return;
}
- let mut response_items = Vec::new();
- if let Some(model_context_window) = turn_context.model_context_window()
- && model_context_window > 0
- && tokens_after_sampling > tokens_before_sampling
- {
- let tokens_before_sampling = tokens_before_sampling.max(0);
- let tokens_after_sampling = tokens_after_sampling.max(0);
- let crossed_threshold = TOKEN_BUDGET_USAGE_THRESHOLDS.iter().any(|threshold| {
- tokens_before_sampling.saturating_mul(100)
- < model_context_window.saturating_mul(*threshold)
- && tokens_after_sampling.saturating_mul(100)
- >= model_context_window.saturating_mul(*threshold)
- });
- if crossed_threshold {
- let tokens_left = model_context_window
- .saturating_sub(tokens_after_sampling)
- .max(0);
- response_items.push(ContextualUserFragment::into(
- crate::context::TokenBudgetRemainingContext::new(tokens_left),
- ));
- }
- }
-
- if let Some(config) = turn_context.config.token_budget.as_ref().filter(|config| {
+ let Some(config) = turn_context.config.token_budget.as_ref().filter(|config| {
config
.reminder_threshold_tokens
.is_some_and(|threshold| tokens_until_compaction <= threshold)
- }) {
- let reminder_due = {
- let mut state = sess.state.lock().await;
- state.claim_token_budget_reminder()
- };
- if reminder_due {
- response_items.push(ContextualUserFragment::into(
- crate::context::TokenBudgetReminder::new(
- &config.reminder_message_template,
- tokens_until_compaction,
- ),
- ));
- }
+ }) else {
+ return;
+ };
+
+ let reminder_due = {
+ let mut state = sess.state.lock().await;
+ state.claim_token_budget_reminder()
+ };
+ if !reminder_due {
+ return;
}
- if !response_items.is_empty() {
- sess.record_conversation_items(turn_context, &response_items)
- .await;
- }
+ let response_item = ContextualUserFragment::into(crate::context::TokenBudgetReminder::new(
+ &config.reminder_message_template,
+ tokens_until_compaction,
+ ));
+ sess.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
+ .await;
}
diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs
index df74841b1..a35d9b60c 100644
--- a/codex-rs/core/src/session/turn.rs
+++ b/codex-rs/core/src/session/turn.rs
@@ -254,8 +254,7 @@ pub(crate) async fn run_turn(
window_id,
CodexResponsesRequestKind::Turn,
);
- let tokens_before_sampling = sess.get_total_token_usage().await;
- let (sampling_request_output, sampling_request_input) = run_sampling_request(
+ run_sampling_request(
Arc::clone(&sess),
Arc::clone(&turn_context),
Arc::clone(&turn_extension_data),
@@ -265,17 +264,11 @@ pub(crate) async fn run_turn(
sampling_request_input,
cancellation_token.child_token(),
)
- .await?;
-
- Ok((
- tokens_before_sampling,
- sampling_request_output,
- sampling_request_input,
- ))
+ .await
}
.await;
match sampling_request_result {
- Ok((tokens_before_sampling, sampling_request_output, sampling_request_input)) => {
+ Ok((sampling_request_output, sampling_request_input)) => {
let SamplingRequestResult {
needs_follow_up: model_needs_follow_up,
last_agent_message: sampling_request_last_agent_message,
@@ -326,8 +319,6 @@ pub(crate) async fn run_turn(
super::token_budget::maybe_record(
sess.as_ref(),
turn_context.as_ref(),
- tokens_before_sampling,
- tokens_after_sampling,
tokens_until_compaction,
)
.await;
diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__token_budget__token_budget_new_context_window_tool_full_context.snap b/codex-rs/core/tests/suite/snapshots/all__suite__token_budget__token_budget_new_context_window_tool_full_context.snap
index e5dd60717..c0c905625 100644
--- a/codex-rs/core/tests/suite/snapshots/all__suite__token_budget__token_budget_new_context_window_tool_full_context.snap
+++ b/codex-rs/core/tests/suite/snapshots/all__suite__token_budget__token_budget_new_context_window_tool_full_context.snap
@@ -9,7 +9,7 @@ Scenario: New context window tool installs fresh full context before the next fo
00:message/developer[3]:
[01]
[02]
- [03] \nThread id .\nFirst context window id .\nPrevious context window id .\nCurrent context window id .\n
+ [03] Thread id .\nFirst context window id: \nCurrent context window id: \nPrevious context window id:
01:message/user:>
02:function_call/update_plan
03:function_call_output:Plan updated
diff --git a/codex-rs/core/tests/suite/token_budget.rs b/codex-rs/core/tests/suite/token_budget.rs
index a1f8fb5de..4d448f9e7 100644
--- a/codex-rs/core/tests/suite/token_budget.rs
+++ b/codex-rs/core/tests/suite/token_budget.rs
@@ -33,11 +33,11 @@ use std::time::Duration;
const CONFIGURED_CONTEXT_WINDOW: i64 = 128_000;
-fn token_budget_texts(request: &ResponsesRequest) -> Vec {
+fn token_budget_contexts(request: &ResponsesRequest) -> Vec {
request
.message_input_texts("developer")
.into_iter()
- .filter(|text| text.starts_with(""))
+ .filter(|text| text.starts_with("Thread id "))
.collect()
}
@@ -47,7 +47,7 @@ fn token_budget_window_ids(
) -> (String, Option, String) {
let captures = assert_regex_match(
&format!(
- r"^\nThread id {thread_id}\.\nFirst context window id ([0-9a-f-]{{36}})\.\nPrevious context window id (none|[0-9a-f-]{{36}})\.\nCurrent context window id ([0-9a-f-]{{36}})\.\n$"
+ r"^Thread id {thread_id}\.\nFirst context window id: ([0-9a-f-]{{36}})\nCurrent context window id: ([0-9a-f-]{{36}})(?:\nPrevious context window id: ([0-9a-f-]{{36}}))?$"
),
text,
);
@@ -56,16 +56,12 @@ fn token_budget_window_ids(
.expect("first window id capture")
.as_str()
.to_string();
- let previous_window_id = captures
- .get(2)
- .expect("previous window id capture")
- .as_str();
- let previous_window_id = (previous_window_id != "none").then(|| previous_window_id.to_string());
let window_id = captures
- .get(3)
+ .get(2)
.expect("window id capture")
.as_str()
.to_string();
+ let previous_window_id = captures.get(3).map(|capture| capture.as_str().to_string());
(first_window_id, previous_window_id, window_id)
}
@@ -115,14 +111,14 @@ async fn token_budget_context_is_only_emitted_with_full_context() -> Result<()>
assert_eq!(requests.len(), 2);
let thread_id = test.session_configured.thread_id;
- let initial_token_budget = token_budget_texts(&requests[0]);
+ let initial_token_budget = token_budget_contexts(&requests[0]);
assert_eq!(initial_token_budget.len(), 1);
let (first_window_id, previous_window_id, window_id) =
token_budget_window_ids(&initial_token_budget[0], thread_id);
assert_eq!(previous_window_id, None);
assert_eq!(first_window_id, window_id);
assert_eq!(
- token_budget_texts(&requests[1]),
+ token_budget_contexts(&requests[1]),
initial_token_budget,
"steady-state context update should not advance the context window"
);
@@ -191,11 +187,11 @@ async fn token_budget_context_injects_plain_thread_hint_text() -> Result<()> {
let request = responses.single_request();
let thread_id = test.session_configured.thread_id;
- let token_budgets = token_budget_texts(&request);
+ let token_budgets = token_budget_contexts(&request);
assert_eq!(token_budgets.len(), 1);
let captures = assert_regex_match(
&format!(
- r"^\nThread id {thread_id}\.\nFirst context window id ([0-9a-f-]{{36}})\.\nPrevious context window id none\.\nCurrent context window id ([0-9a-f-]{{36}})\.\nmanual history hint for thread {thread_id}\nunstructured notes/thread_hint fixture result\n$"
+ r"^Thread id {thread_id}\.\nFirst context window id: ([0-9a-f-]{{36}})\nCurrent context window id: ([0-9a-f-]{{36}})\nmanual history hint for thread {thread_id}\nunstructured notes/thread_hint fixture result$"
),
&token_budgets[0],
);
@@ -213,92 +209,6 @@ async fn token_budget_context_injects_plain_thread_hint_text() -> Result<()> {
Ok(())
}
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn token_budget_remaining_context_emits_on_first_threshold_crossing() -> Result<()> {
- skip_if_no_network!(Ok(()));
-
- let server = start_mock_server().await;
- let responses = mount_sse_sequence(
- &server,
- vec![
- sse(vec![
- ev_response_created("resp-1"),
- ev_completed_with_tokens("resp-1", /*total_tokens*/ 2_500),
- ]),
- sse(vec![
- ev_response_created("resp-2"),
- ev_completed_with_tokens("resp-2", /*total_tokens*/ 3_000),
- ]),
- sse(vec![
- ev_response_created("resp-3"),
- ev_completed_with_tokens("resp-3", /*total_tokens*/ 5_000),
- ]),
- sse(vec![
- ev_response_created("resp-4"),
- ev_completed_with_tokens("resp-4", /*total_tokens*/ 8_000),
- ]),
- sse(vec![ev_response_created("resp-5"), ev_completed("resp-5")]),
- ],
- )
- .await;
- let test = test_codex()
- .with_config(|config| {
- config.model_context_window = Some(10_000);
- config
- .features
- .enable(Feature::TokenBudget)
- .expect("test config should allow token budget");
- })
- .build(&server)
- .await?;
-
- for turn in 1..=5 {
- test.submit_turn(&format!("turn {turn}")).await?;
- }
-
- let requests = responses.requests();
- assert_eq!(requests.len(), 5);
-
- let thread_id = test.session_configured.thread_id;
- let full_context = token_budget_texts(&requests[0]);
- assert_eq!(full_context.len(), 1);
- token_budget_window_ids(&full_context[0], thread_id);
- let full_context = full_context[0].clone();
- let threshold_25 =
- "\nYou have 7000 tokens left in this context window.\n"
- .to_string();
- let threshold_50 =
- "\nYou have 4500 tokens left in this context window.\n"
- .to_string();
- let threshold_75 =
- "\nYou have 1500 tokens left in this context window.\n"
- .to_string();
-
- assert_eq!(token_budget_texts(&requests[0]), vec![full_context.clone()]);
- assert_eq!(
- token_budget_texts(&requests[1]),
- vec![full_context.clone(), threshold_25.clone()]
- );
- assert_eq!(
- token_budget_texts(&requests[2]),
- vec![full_context.clone(), threshold_25.clone()]
- );
- assert_eq!(
- token_budget_texts(&requests[3]),
- vec![
- full_context.clone(),
- threshold_25.clone(),
- threshold_50.clone()
- ]
- );
- assert_eq!(
- token_budget_texts(&requests[4]),
- vec![full_context, threshold_25, threshold_50, threshold_75]
- );
-
- Ok(())
-}
-
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn token_budget_reminder_emits_after_crossing_compaction_threshold() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -335,17 +245,18 @@ async fn token_budget_reminder_emits_after_crossing_compaction_threshold() -> Re
let requests = responses.requests();
assert_eq!(requests.len(), 2);
- let initial_context = token_budget_texts(&requests[0]);
+ let initial_context = token_budget_contexts(&requests[0]);
assert_eq!(initial_context.len(), 1);
- let remaining_context =
- "\nYou have 1500 tokens left in this context window.\n"
- .to_string();
- let reminder = "\nYour context window is nearly exhausted (only 1000 tokens remaining) and will be automatically reset for you soon. Once reset, message items in current context window will be cleared in the new window, but notes and history items will be persistent across windows.\n"
- .to_string();
+ let reminder = "Your context window is nearly exhausted (only 1000 tokens remaining) and will be automatically reset for you soon. Once reset, message items in current context window will be cleared in the new window, but notes and history items will be persistent across windows.";
assert_eq!(
- token_budget_texts(&requests[1]),
- vec![initial_context[0].clone(), remaining_context, reminder]
+ requests[1]
+ .message_input_texts("developer")
+ .into_iter()
+ .filter(|text| text == reminder)
+ .count(),
+ 1
);
+ assert_eq!(token_budget_contexts(&requests[1]), initial_context);
Ok(())
}
@@ -401,13 +312,10 @@ async fn get_context_remaining_returns_token_budget_remaining_fragment() -> Resu
);
let thread_id = test.session_configured.thread_id;
- let remaining_context =
- "\nYou have 7000 tokens left in this context window.\n"
- .to_string();
- let token_budgets = token_budget_texts(&requests[1]);
- assert_eq!(token_budgets.len(), 2);
+ let remaining_context = "You have 7000 tokens left in this context window.".to_string();
+ let token_budgets = token_budget_contexts(&requests[1]);
+ assert_eq!(token_budgets.len(), 1);
token_budget_window_ids(&token_budgets[0], thread_id);
- assert_eq!(token_budgets[1], remaining_context);
assert_eq!(
requests[2].function_call_output_content_and_success(call_id),
Some((Some(remaining_context), None))
@@ -464,14 +372,11 @@ async fn get_context_remaining_returns_unknown_when_window_is_unavailable() -> R
"get_context_remaining should be exposed when token budget is enabled"
);
- assert_eq!(token_budget_texts(&requests[0]), Vec::::new());
+ assert_eq!(token_budget_contexts(&requests[0]), Vec::::new());
assert_eq!(
requests[1].function_call_output_content_and_success(call_id),
Some((
- Some(
- "\nYou have unknown tokens left in this context window.\n"
- .to_string()
- ),
+ Some("You have unknown tokens left in this context window.".to_string()),
None,
))
);
@@ -527,11 +432,11 @@ async fn token_budget_context_uses_new_window_after_compaction() -> Result<()> {
assert_eq!(requests.len(), 3);
let thread_id = test.session_configured.thread_id;
- let initial_token_budget = token_budget_texts(&requests[0]);
+ let initial_token_budget = token_budget_contexts(&requests[0]);
assert_eq!(initial_token_budget.len(), 1);
let (initial_first_window_id, initial_previous_window_id, initial_window_id) =
token_budget_window_ids(&initial_token_budget[0], thread_id);
- let post_compaction_token_budget = token_budget_texts(&requests[2]);
+ let post_compaction_token_budget = token_budget_contexts(&requests[2]);
assert_eq!(post_compaction_token_budget.len(), 1);
let (
post_compaction_first_window_id,
@@ -606,11 +511,11 @@ async fn new_context_tool_starts_new_window_before_follow_up() -> Result<()> {
"new_context should be exposed when token budget is enabled"
);
let thread_id = test.session_configured.thread_id;
- let initial_token_budget = token_budget_texts(&requests[0]);
+ let initial_token_budget = token_budget_contexts(&requests[0]);
assert_eq!(initial_token_budget.len(), 1);
let (initial_first_window_id, _, initial_window_id) =
token_budget_window_ids(&initial_token_budget[0], thread_id);
- let new_window_token_budget = token_budget_texts(&requests[2]);
+ let new_window_token_budget = token_budget_contexts(&requests[2]);
assert_eq!(new_window_token_budget.len(), 1);
let (new_first_window_id, new_previous_window_id, new_window_id) =
token_budget_window_ids(&new_window_token_budget[0], thread_id);