Stabilize memory Phase 2 input ordering (#19967)

## Why

Phase 2 still needs to choose the most relevant stage-1 memory outputs
by usage and recency, but exposing that ranking as the rendered
`raw_memories.md` order creates unnecessary large diff. Usage-count or
timestamp changes can reshuffle otherwise unchanged memories, making the
workspace diff noisy and giving the consolidation prompt a misleading
recency signal from file position.
This fix will reduce token consumption

## What Changed

- Keep the existing top-N Phase 2 selection ranking by `usage_count`,
`last_usage`, `source_updated_at`, and `thread_id`.
- Return the selected rows in stable ascending `thread_id` order before
syncing Phase 2 filesystem inputs.
- Update the memory README, raw memories header, and consolidation
prompt so they describe the stable order and tell the prompt to use
metadata and workspace diffs instead of file order as the recency
signal.
- Adjust the memory runtime tests to use deterministic thread IDs and
assert the stable return order separately from the ranked selection
semantics.

## Test Coverage

- Existing memory runtime tests in
`codex-rs/state/src/runtime/memories.rs` now cover the stable returned
ordering for Phase 2 inputs.

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
jif-oai
2026-04-28 13:32:05 +02:00
committed by GitHub
Unverified
parent 54d1401170
commit fa127be25f
5 changed files with 85 additions and 56 deletions
+1 -1
View File
@@ -22,6 +22,6 @@ pub(super) fn usage_hint_text<'a>(
| SessionSource::Mcp
| SessionSource::Custom(_)
| SessionSource::Unknown => multi_agent_v2.root_agent_usage_hint_text.as_deref(),
SessionSource::SubAgent(_) => None,
SessionSource::Internal(_) | SessionSource::SubAgent(_) => None,
}
}
+5 -4
View File
@@ -94,7 +94,7 @@ What it does:
`last_usage` / `generated_at`
- computes a completion watermark from the claimed watermark + newest input timestamps
- syncs local memory artifacts under the memories root:
- `raw_memories.md` (merged raw memories, latest first)
- `raw_memories.md` (merged raw memories, stable ascending thread-id order)
- `rollout_summaries/` (one summary file per selected rollout)
- keeps the memories root itself as a git-baseline directory, initialized under
`~/.codex/memories/.git` by `codex-git-utils`
@@ -127,9 +127,10 @@ Selection and workspace-diff behavior:
- Phase 1 upserts preserve the previous `selected_for_phase2` baseline until
the next successful Phase 2 run rewrites it
- Phase 2 loads only the current top-N selected stage-1 inputs, syncs
`rollout_summaries/` and `raw_memories.md` directly to that selection, then
lets the git-style workspace diff surface additions, modifications, and
deletions against the previous successful memory baseline
`rollout_summaries/` directly to that selection, renders `raw_memories.md`
in stable ascending thread-id order to avoid usage-rank churn, then lets the
git-style workspace diff surface additions, modifications, and deletions
against the previous successful memory baseline
- when the selected input set is empty, stale `rollout_summaries/` files are
removed and `raw_memories.md` is rewritten to the empty-input placeholder;
consolidated outputs such as `MEMORY.md`, `memory_summary.md`, and `skills/`
+1 -1
View File
@@ -54,7 +54,7 @@ async fn rebuild_raw_memories_file(
return tokio::fs::write(raw_memories_file(root), body).await;
}
body.push_str("Merged stage-1 raw memories (latest first):\n\n");
body.push_str("Merged stage-1 raw memories (stable ascending thread-id order):\n\n");
for memory in retained {
writeln!(body, "## Thread `{}`", memory.thread_id).map_err(raw_memories_format_error)?;
writeln!(
@@ -120,11 +120,12 @@ Primary inputs (always read these, if exists):
Under `{{ memory_root }}/`:
- `raw_memories.md`
- mechanical merge of `raw_memories` from Phase 1; ordered latest-first.
- Use this recency ordering as a major heuristic when choosing what to promote, expand, or deprecate.
- Default scan order: top-to-bottom. In INCREMENTAL UPDATE mode, bias attention toward the newest
portion first, then expand to older entries with enough coverage to avoid missing important older
context.
- mechanical merge of selected `raw_memories` from Phase 1; ordered by stable ascending thread id.
- Do not treat file order as recency or importance; use `updated_at`, workspace diff context,
and rollout content when choosing what to promote, expand, or deprecate.
- Default scan order: top-to-bottom. In INCREMENTAL UPDATE mode, use the workspace diff to find
changed entries first, then expand to unchanged entries with enough coverage to avoid missing
important older context.
- source of rollout-level metadata needed for MEMORY.md `### rollout_summary_files`
annotations;
you should be able to find `cwd`, `rollout_path`, and `updated_at` there.
+72 -45
View File
@@ -335,9 +335,10 @@ WHERE thread_id IN (
/// `last_usage` is within `max_unused_days`, or whose
/// `source_updated_at` is within that window when the memory has never
/// been used
/// - eligible rows are ordered by `usage_count DESC`,
/// - eligible rows are ranked by `usage_count DESC`,
/// `COALESCE(last_usage, source_updated_at) DESC`, `source_updated_at DESC`,
/// `thread_id DESC`
/// - the selected top-N rows are returned in stable `thread_id ASC` order
///
/// The returned rows are the complete Phase 2 filesystem input. Phase 2
/// syncs these rows directly; deletions are represented by the workspace
@@ -355,30 +356,43 @@ WHERE thread_id IN (
let current_rows = sqlx::query(
r#"
SELECT
so.thread_id,
COALESCE(t.rollout_path, '') AS rollout_path,
so.source_updated_at,
so.raw_memory,
so.rollout_summary,
so.rollout_slug,
so.generated_at,
COALESCE(t.cwd, '') AS cwd,
t.git_branch AS git_branch
FROM stage1_outputs AS so
LEFT JOIN threads AS t
ON t.id = so.thread_id
WHERE t.memory_mode = 'enabled'
AND (length(trim(so.raw_memory)) > 0 OR length(trim(so.rollout_summary)) > 0)
AND (
(so.last_usage IS NOT NULL AND so.last_usage >= ?)
OR (so.last_usage IS NULL AND so.source_updated_at >= ?)
)
ORDER BY
COALESCE(so.usage_count, 0) DESC,
COALESCE(so.last_usage, so.source_updated_at) DESC,
so.source_updated_at DESC,
so.thread_id DESC
LIMIT ?
selected.thread_id,
selected.rollout_path,
selected.source_updated_at,
selected.raw_memory,
selected.rollout_summary,
selected.rollout_slug,
selected.generated_at,
selected.cwd,
selected.git_branch
FROM (
SELECT
so.thread_id,
COALESCE(t.rollout_path, '') AS rollout_path,
so.source_updated_at,
so.raw_memory,
so.rollout_summary,
so.rollout_slug,
so.generated_at,
COALESCE(t.cwd, '') AS cwd,
t.git_branch AS git_branch
FROM stage1_outputs AS so
LEFT JOIN threads AS t
ON t.id = so.thread_id
WHERE t.memory_mode = 'enabled'
AND (length(trim(so.raw_memory)) > 0 OR length(trim(so.rollout_summary)) > 0)
AND (
(so.last_usage IS NOT NULL AND so.last_usage >= ?)
OR (so.last_usage IS NULL AND so.source_updated_at >= ?)
)
ORDER BY
COALESCE(so.usage_count, 0) DESC,
COALESCE(so.last_usage, so.source_updated_at) DESC,
so.source_updated_at DESC,
so.thread_id DESC
LIMIT ?
) AS selected
ORDER BY selected.thread_id ASC
"#,
)
.bind(cutoff)
@@ -1260,6 +1274,10 @@ mod tests {
use std::sync::Arc;
use uuid::Uuid;
fn stable_thread_id(value: &str) -> ThreadId {
ThreadId::from_string(value).expect("thread id")
}
#[tokio::test]
async fn stage1_claim_skips_when_up_to_date() {
let codex_home = unique_temp_dir();
@@ -2829,9 +2847,9 @@ VALUES (?, ?, ?, ?, ?)
.await
.expect("initialize runtime");
let thread_id_a = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id");
let thread_id_b = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id");
let thread_id_c = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id");
let thread_id_a = stable_thread_id("00000000-0000-4000-8000-000000000001");
let thread_id_b = stable_thread_id("00000000-0000-4000-8000-000000000002");
let thread_id_c = stable_thread_id("00000000-0000-4000-8000-000000000003");
let owner = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("owner id");
for (thread_id, workspace) in [
@@ -2918,12 +2936,21 @@ VALUES (?, ?, ?, ?, ?)
.expect("load phase2 input selection");
assert_eq!(selection.len(), 2);
assert_eq!(selection[0].thread_id, thread_id_c);
assert_eq!(
selection[0].rollout_path,
selection
.iter()
.map(|output| output.thread_id)
.collect::<Vec<_>>(),
vec![thread_id_b, thread_id_c]
);
let selected_c = selection
.iter()
.find(|output| output.thread_id == thread_id_c)
.expect("thread c should be selected");
assert_eq!(
selected_c.rollout_path,
codex_home.join(format!("rollout-{thread_id_c}.jsonl"))
);
assert_eq!(selection[1].thread_id, thread_id_b);
let _ = tokio::fs::remove_dir_all(codex_home).await;
}
@@ -3235,10 +3262,10 @@ VALUES (?, ?, ?, ?, ?)
.await
.expect("initialize runtime");
let thread_id_a = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread a");
let thread_id_b = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread b");
let thread_id_c = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread c");
let thread_id_d = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread d");
let thread_id_a = stable_thread_id("00000000-0000-4000-8000-000000000001");
let thread_id_b = stable_thread_id("00000000-0000-4000-8000-000000000002");
let thread_id_c = stable_thread_id("00000000-0000-4000-8000-000000000003");
let thread_id_d = stable_thread_id("00000000-0000-4000-8000-000000000004");
let owner = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("owner id");
for (thread_id, workspace) in [
@@ -3365,7 +3392,7 @@ VALUES (?, ?, ?, ?, ?)
.iter()
.map(|output| output.thread_id)
.collect::<Vec<_>>(),
vec![thread_id_d, thread_id_c]
vec![thread_id_c, thread_id_d]
);
let _ = tokio::fs::remove_dir_all(codex_home).await;
@@ -3768,9 +3795,9 @@ VALUES (?, ?, ?, ?, ?)
let now = Utc::now();
let owner = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("owner id");
let thread_a = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id a");
let thread_b = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id b");
let thread_c = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id c");
let thread_a = stable_thread_id("00000000-0000-4000-8000-000000000001");
let thread_b = stable_thread_id("00000000-0000-4000-8000-000000000002");
let thread_c = stable_thread_id("00000000-0000-4000-8000-000000000003");
for (thread_id, workspace) in [
(thread_a, "workspace-a"),
@@ -3840,7 +3867,7 @@ VALUES (?, ?, ?, ?, ?)
}
let selection = runtime
.get_phase2_input_selection(/*n*/ 3, /*max_unused_days*/ 30)
.get_phase2_input_selection(/*n*/ 1, /*max_unused_days*/ 30)
.await
.expect("load phase2 input selection");
@@ -3849,7 +3876,7 @@ VALUES (?, ?, ?, ?, ?)
.iter()
.map(|output| output.thread_id)
.collect::<Vec<_>>(),
vec![thread_b, thread_a, thread_c]
vec![thread_b]
);
let _ = tokio::fs::remove_dir_all(codex_home).await;
@@ -3864,9 +3891,9 @@ VALUES (?, ?, ?, ?, ?)
let now = Utc::now();
let owner = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("owner id");
let thread_a = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id a");
let thread_b = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id b");
let thread_c = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id c");
let thread_a = stable_thread_id("00000000-0000-4000-8000-000000000001");
let thread_b = stable_thread_id("00000000-0000-4000-8000-000000000002");
let thread_c = stable_thread_id("00000000-0000-4000-8000-000000000003");
for (thread_id, workspace) in [
(thread_a, "workspace-a"),
@@ -3945,7 +3972,7 @@ VALUES (?, ?, ?, ?, ?)
.iter()
.map(|output| output.thread_id)
.collect::<Vec<_>>(),
vec![thread_c, thread_b]
vec![thread_b, thread_c]
);
let _ = tokio::fs::remove_dir_all(codex_home).await;