Test selected capabilities across unavailable resume (#30215)

## Why

The selected-capability integration test already covers initial
attachment and cold resume, but it resumes while the selected executor
is still reachable.

That leaves an important World State transition untested: a thread
remembers its selected capability root, resumes while that environment
is unavailable, and later sees the same stable environment return.

## What this tests

This extends the existing end-to-end scenario:

```text
selected executor available
        ↓
app-server stops and the executor goes away
        ↓
thread resumes with the executor unavailable
        ↓
skills, selected MCP tools, and connector attribution are absent
        ↓
the same environment ID is attached again
        ↓
skills, MCP tools, and connector attribution return
```

The test also checks that the unavailable snapshot explicitly tells the
model that no selected-environment skills are currently available. After
reattachment, it invokes the selected skill again and verifies that a
new executor-owned MCP process starts.

## Scope

This is test-only. It keeps the existing assumption that an environment
ID refers to stable capability contents. It does not add package-file
invalidation or live transport reconnect behavior.
This commit is contained in:
jif
2026-06-26 11:02:27 +01:00
committed by GitHub
Unverified
parent 6d2168f06a
commit 3c03bb4f18
@@ -57,6 +57,7 @@ const SKILL_NAME: &str = "executor-demo:deploy";
const SKILL_DESCRIPTION: &str = "Deploy through the selected executor.";
const SKILL_BODY_MARKER: &str = "SELECTED_EXECUTOR_SKILL_BODY";
const LOCAL_SKILL_BODY_MARKER: &str = "COLLIDING_LOCAL_SKILL_BODY";
const NO_SELECTED_SKILLS_MESSAGE: &str = "No selected-environment skills are currently available.";
const MCP_SERVER_NAME: &str = "executor_probe";
const MCP_CALL_ID: &str = "selected-executor-mcp-call";
const CONNECTOR_ID: &str = "calendar";
@@ -122,9 +123,17 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
responses::ev_completed("unchanged-step"),
]),
responses::sse(vec![
responses::ev_response_created("resumed-step"),
responses::ev_assistant_message("resumed-message", "Ready after resume"),
responses::ev_completed("resumed-step"),
responses::ev_response_created("resumed-unavailable-step"),
responses::ev_assistant_message(
"resumed-unavailable-message",
"Unavailable after resume",
),
responses::ev_completed("resumed-unavailable-step"),
]),
responses::sse(vec![
responses::ev_response_created("reattached-step"),
responses::ev_assistant_message("reattached-message", "Ready after reattach"),
responses::ev_completed("reattached-step"),
]),
],
)
@@ -172,6 +181,7 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
.await?;
assert_eq!(first_mcp_pid, wait_for_pid_file(&fixture.pid_file).await?);
exec_server.kill().await?;
drop(app_server);
std::fs::remove_file(&fixture.pid_file)?;
@@ -190,12 +200,30 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
.await??;
let ThreadResumeResponse { thread, .. } = to_response(response)?;
assert_eq!(thread_id, thread.id);
run_turn(
&mut app_server,
&thread_id,
"Inspect capabilities while the selected executor is unavailable",
fixture.environment_cwd.clone(),
)
.await?;
let requests = response_mock.requests();
assert_eq!(5, requests.len());
assert_selected_plugin_tools_absent(&requests[4]);
assert!(
latest_selected_skill_update(&requests[4])
.is_some_and(|text| text.contains(NO_SELECTED_SKILLS_MESSAGE))
);
exec_server = spawn_exec_server(fixture.codex_home.path(), &fixture.exec_server_url).await?;
add_environment(&mut app_server, &fixture.exec_server_url).await?;
wait_for_selected_mcp_server(&mut app_server, &thread_id).await?;
run_turn(
&mut app_server,
&thread_id,
&format!("Use ${SKILL_NAME} after resuming the thread"),
&format!("Use ${SKILL_NAME} after reattaching the selected executor"),
fixture.environment_cwd,
)
.await?;
@@ -203,13 +231,13 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
assert_ne!(first_mcp_pid, resumed_mcp_pid);
let requests = response_mock.requests();
assert_eq!(5, requests.len());
assert_eq!(6, requests.len());
for request in &requests[1..4] {
assert_selected_skill_is_injected(request, /*expected_count*/ 1);
assert_selected_plugin_tools(request);
}
assert_selected_skill_is_injected(&requests[4], /*expected_count*/ 2);
assert_selected_plugin_tools(&requests[4]);
assert_selected_skill_is_injected(&requests[5], /*expected_count*/ 2);
assert_selected_plugin_tools(&requests[5]);
let output = requests[2].function_call_output(MCP_CALL_ID);
let output = output["output"]
.as_str()
@@ -508,6 +536,10 @@ fn assert_selected_capabilities_absent(request: &ResponsesRequest) {
.into_iter()
.all(|text| !text.contains(SKILL_DESCRIPTION))
);
assert_selected_plugin_tools_absent(request);
}
fn assert_selected_plugin_tools_absent(request: &ResponsesRequest) {
assert!(
request
.tool_by_name(&format!("mcp__{MCP_SERVER_NAME}"), "echo")
@@ -540,13 +572,17 @@ fn assert_selected_skill_is_injected(request: &ResponsesRequest, expected_count:
}
fn assert_selected_skill_catalog_available(request: &ResponsesRequest) {
let catalog_fragments = request
let catalog_fragment = latest_selected_skill_update(request)
.expect("selected skill catalog update should be model-visible");
assert!(catalog_fragment.contains(SKILL_DESCRIPTION));
assert!(catalog_fragment.contains("environment resource:"));
}
fn latest_selected_skill_update(request: &ResponsesRequest) -> Option<String> {
request
.message_input_texts("developer")
.into_iter()
.filter(|text| text.contains(SKILL_DESCRIPTION))
.collect::<Vec<_>>();
assert_eq!(1, catalog_fragments.len());
assert!(catalog_fragments[0].contains("environment resource:"));
.rfind(|text| text.contains(SKILL_DESCRIPTION) || text.contains(NO_SELECTED_SKILLS_MESSAGE))
}
fn assert_selected_plugin_tools(request: &ResponsesRequest) {