mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
Test selected capabilities across unavailable resume (#30215)
## Why
The selected-capability integration test already covers initial
attachment and cold resume, but it resumes while the selected executor
is still reachable.
That leaves an important World State transition untested: a thread
remembers its selected capability root, resumes while that environment
is unavailable, and later sees the same stable environment return.
## What this tests
This extends the existing end-to-end scenario:
```text
selected executor available
↓
app-server stops and the executor goes away
↓
thread resumes with the executor unavailable
↓
skills, selected MCP tools, and connector attribution are absent
↓
the same environment ID is attached again
↓
skills, MCP tools, and connector attribution return
```
The test also checks that the unavailable snapshot explicitly tells the
model that no selected-environment skills are currently available. After
reattachment, it invokes the selected skill again and verifies that a
new executor-owned MCP process starts.
## Scope
This is test-only. It keeps the existing assumption that an environment
ID refers to stable capability contents. It does not add package-file
invalidation or live transport reconnect behavior.
This commit is contained in:
@@ -57,6 +57,7 @@ const SKILL_NAME: &str = "executor-demo:deploy";
|
||||
const SKILL_DESCRIPTION: &str = "Deploy through the selected executor.";
|
||||
const SKILL_BODY_MARKER: &str = "SELECTED_EXECUTOR_SKILL_BODY";
|
||||
const LOCAL_SKILL_BODY_MARKER: &str = "COLLIDING_LOCAL_SKILL_BODY";
|
||||
const NO_SELECTED_SKILLS_MESSAGE: &str = "No selected-environment skills are currently available.";
|
||||
const MCP_SERVER_NAME: &str = "executor_probe";
|
||||
const MCP_CALL_ID: &str = "selected-executor-mcp-call";
|
||||
const CONNECTOR_ID: &str = "calendar";
|
||||
@@ -122,9 +123,17 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
|
||||
responses::ev_completed("unchanged-step"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("resumed-step"),
|
||||
responses::ev_assistant_message("resumed-message", "Ready after resume"),
|
||||
responses::ev_completed("resumed-step"),
|
||||
responses::ev_response_created("resumed-unavailable-step"),
|
||||
responses::ev_assistant_message(
|
||||
"resumed-unavailable-message",
|
||||
"Unavailable after resume",
|
||||
),
|
||||
responses::ev_completed("resumed-unavailable-step"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("reattached-step"),
|
||||
responses::ev_assistant_message("reattached-message", "Ready after reattach"),
|
||||
responses::ev_completed("reattached-step"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
@@ -172,6 +181,7 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
|
||||
.await?;
|
||||
assert_eq!(first_mcp_pid, wait_for_pid_file(&fixture.pid_file).await?);
|
||||
|
||||
exec_server.kill().await?;
|
||||
drop(app_server);
|
||||
std::fs::remove_file(&fixture.pid_file)?;
|
||||
|
||||
@@ -190,12 +200,30 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
|
||||
.await??;
|
||||
let ThreadResumeResponse { thread, .. } = to_response(response)?;
|
||||
assert_eq!(thread_id, thread.id);
|
||||
|
||||
run_turn(
|
||||
&mut app_server,
|
||||
&thread_id,
|
||||
"Inspect capabilities while the selected executor is unavailable",
|
||||
fixture.environment_cwd.clone(),
|
||||
)
|
||||
.await?;
|
||||
let requests = response_mock.requests();
|
||||
assert_eq!(5, requests.len());
|
||||
assert_selected_plugin_tools_absent(&requests[4]);
|
||||
assert!(
|
||||
latest_selected_skill_update(&requests[4])
|
||||
.is_some_and(|text| text.contains(NO_SELECTED_SKILLS_MESSAGE))
|
||||
);
|
||||
|
||||
exec_server = spawn_exec_server(fixture.codex_home.path(), &fixture.exec_server_url).await?;
|
||||
add_environment(&mut app_server, &fixture.exec_server_url).await?;
|
||||
wait_for_selected_mcp_server(&mut app_server, &thread_id).await?;
|
||||
|
||||
run_turn(
|
||||
&mut app_server,
|
||||
&thread_id,
|
||||
&format!("Use ${SKILL_NAME} after resuming the thread"),
|
||||
&format!("Use ${SKILL_NAME} after reattaching the selected executor"),
|
||||
fixture.environment_cwd,
|
||||
)
|
||||
.await?;
|
||||
@@ -203,13 +231,13 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
|
||||
assert_ne!(first_mcp_pid, resumed_mcp_pid);
|
||||
|
||||
let requests = response_mock.requests();
|
||||
assert_eq!(5, requests.len());
|
||||
assert_eq!(6, requests.len());
|
||||
for request in &requests[1..4] {
|
||||
assert_selected_skill_is_injected(request, /*expected_count*/ 1);
|
||||
assert_selected_plugin_tools(request);
|
||||
}
|
||||
assert_selected_skill_is_injected(&requests[4], /*expected_count*/ 2);
|
||||
assert_selected_plugin_tools(&requests[4]);
|
||||
assert_selected_skill_is_injected(&requests[5], /*expected_count*/ 2);
|
||||
assert_selected_plugin_tools(&requests[5]);
|
||||
let output = requests[2].function_call_output(MCP_CALL_ID);
|
||||
let output = output["output"]
|
||||
.as_str()
|
||||
@@ -508,6 +536,10 @@ fn assert_selected_capabilities_absent(request: &ResponsesRequest) {
|
||||
.into_iter()
|
||||
.all(|text| !text.contains(SKILL_DESCRIPTION))
|
||||
);
|
||||
assert_selected_plugin_tools_absent(request);
|
||||
}
|
||||
|
||||
fn assert_selected_plugin_tools_absent(request: &ResponsesRequest) {
|
||||
assert!(
|
||||
request
|
||||
.tool_by_name(&format!("mcp__{MCP_SERVER_NAME}"), "echo")
|
||||
@@ -540,13 +572,17 @@ fn assert_selected_skill_is_injected(request: &ResponsesRequest, expected_count:
|
||||
}
|
||||
|
||||
fn assert_selected_skill_catalog_available(request: &ResponsesRequest) {
|
||||
let catalog_fragments = request
|
||||
let catalog_fragment = latest_selected_skill_update(request)
|
||||
.expect("selected skill catalog update should be model-visible");
|
||||
assert!(catalog_fragment.contains(SKILL_DESCRIPTION));
|
||||
assert!(catalog_fragment.contains("environment resource:"));
|
||||
}
|
||||
|
||||
fn latest_selected_skill_update(request: &ResponsesRequest) -> Option<String> {
|
||||
request
|
||||
.message_input_texts("developer")
|
||||
.into_iter()
|
||||
.filter(|text| text.contains(SKILL_DESCRIPTION))
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(1, catalog_fragments.len());
|
||||
assert!(catalog_fragments[0].contains("environment resource:"));
|
||||
.rfind(|text| text.contains(SKILL_DESCRIPTION) || text.contains(NO_SELECTED_SKILLS_MESSAGE))
|
||||
}
|
||||
|
||||
fn assert_selected_plugin_tools(request: &ResponsesRequest) {
|
||||
|
||||
Reference in New Issue
Block a user