Test selected capabilities across unavailable resume (#30215)

## Why The selected-capability integration test already covers initial attachment and cold resume, but it resumes while the selected executor is still reachable. That leaves an important World State transition untested: a thread remembers its selected capability root, resumes while that environment is unavailable, and later sees the same stable environment return. ## What this tests This extends the existing end-to-end scenario: ```text selected executor available ↓ app-server stops and the executor goes away ↓ thread resumes with the executor unavailable ↓ skills, selected MCP tools, and connector attribution are absent ↓ the same environment ID is attached again ↓ skills, MCP tools, and connector attribution return ``` The test also checks that the unavailable snapshot explicitly tells the model that no selected-environment skills are currently available. After reattachment, it invokes the selected skill again and verifies that a new executor-owned MCP process starts. ## Scope This is test-only. It keeps the existing assumption that an environment ID refers to stable capability contents. It does not add package-file invalidation or live transport reconnect behavior.
2026-07-01 00:31:56 +08:00 · 2026-06-26 11:02:27 +01:00
parent 6d2168f06a
commit 3c03bb4f18
1 changed files with 48 additions and 12 deletions
@@ -57,6 +57,7 @@ const SKILL_NAME: &str = "executor-demo:deploy";
 const SKILL_DESCRIPTION: &str = "Deploy through the selected executor.";
 const SKILL_BODY_MARKER: &str = "SELECTED_EXECUTOR_SKILL_BODY";
 const LOCAL_SKILL_BODY_MARKER: &str = "COLLIDING_LOCAL_SKILL_BODY";
+const NO_SELECTED_SKILLS_MESSAGE: &str = "No selected-environment skills are currently available.";
 const MCP_SERVER_NAME: &str = "executor_probe";
 const MCP_CALL_ID: &str = "selected-executor-mcp-call";
 const CONNECTOR_ID: &str = "calendar";
@@ -122,9 +123,17 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
                responses::ev_completed("unchanged-step"),
            ]),
            responses::sse(vec![
-                responses::ev_response_created("resumed-step"),
-                responses::ev_assistant_message("resumed-message", "Ready after resume"),
-                responses::ev_completed("resumed-step"),
+                responses::ev_response_created("resumed-unavailable-step"),
+                responses::ev_assistant_message(
+                    "resumed-unavailable-message",
+                    "Unavailable after resume",
+                ),
+                responses::ev_completed("resumed-unavailable-step"),
+            ]),
+            responses::sse(vec![
+                responses::ev_response_created("reattached-step"),
+                responses::ev_assistant_message("reattached-message", "Ready after reattach"),
+                responses::ev_completed("reattached-step"),
            ]),
        ],
    )
@@ -172,6 +181,7 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
    .await?;
    assert_eq!(first_mcp_pid, wait_for_pid_file(&fixture.pid_file).await?);

+    exec_server.kill().await?;
    drop(app_server);
    std::fs::remove_file(&fixture.pid_file)?;

@@ -190,12 +200,30 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
    .await??;
    let ThreadResumeResponse { thread, .. } = to_response(response)?;
    assert_eq!(thread_id, thread.id);
+
+    run_turn(
+        &mut app_server,
+        &thread_id,
+        "Inspect capabilities while the selected executor is unavailable",
+        fixture.environment_cwd.clone(),
+    )
+    .await?;
+    let requests = response_mock.requests();
+    assert_eq!(5, requests.len());
+    assert_selected_plugin_tools_absent(&requests[4]);
+    assert!(
+        latest_selected_skill_update(&requests[4])
+            .is_some_and(|text| text.contains(NO_SELECTED_SKILLS_MESSAGE))
+    );
+
+    exec_server = spawn_exec_server(fixture.codex_home.path(), &fixture.exec_server_url).await?;
+    add_environment(&mut app_server, &fixture.exec_server_url).await?;
    wait_for_selected_mcp_server(&mut app_server, &thread_id).await?;

    run_turn(
        &mut app_server,
        &thread_id,
-        &format!("Use ${SKILL_NAME} after resuming the thread"),
+        &format!("Use ${SKILL_NAME} after reattaching the selected executor"),
        fixture.environment_cwd,
    )
    .await?;
@@ -203,13 +231,13 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume()
    assert_ne!(first_mcp_pid, resumed_mcp_pid);

    let requests = response_mock.requests();
-    assert_eq!(5, requests.len());
+    assert_eq!(6, requests.len());
    for request in &requests[1..4] {
        assert_selected_skill_is_injected(request, /*expected_count*/ 1);
        assert_selected_plugin_tools(request);
    }
-    assert_selected_skill_is_injected(&requests[4], /*expected_count*/ 2);
-    assert_selected_plugin_tools(&requests[4]);
+    assert_selected_skill_is_injected(&requests[5], /*expected_count*/ 2);
+    assert_selected_plugin_tools(&requests[5]);
    let output = requests[2].function_call_output(MCP_CALL_ID);
    let output = output["output"]
        .as_str()
@@ -508,6 +536,10 @@ fn assert_selected_capabilities_absent(request: &ResponsesRequest) {
            .into_iter()
            .all(|text| !text.contains(SKILL_DESCRIPTION))
    );
+    assert_selected_plugin_tools_absent(request);
+}
+
+fn assert_selected_plugin_tools_absent(request: &ResponsesRequest) {
    assert!(
        request
            .tool_by_name(&format!("mcp__{MCP_SERVER_NAME}"), "echo")
@@ -540,13 +572,17 @@ fn assert_selected_skill_is_injected(request: &ResponsesRequest, expected_count:
 }

 fn assert_selected_skill_catalog_available(request: &ResponsesRequest) {
-    let catalog_fragments = request
+    let catalog_fragment = latest_selected_skill_update(request)
+        .expect("selected skill catalog update should be model-visible");
+    assert!(catalog_fragment.contains(SKILL_DESCRIPTION));
+    assert!(catalog_fragment.contains("environment resource:"));
+}
+
+fn latest_selected_skill_update(request: &ResponsesRequest) -> Option<String> {
+    request
        .message_input_texts("developer")
        .into_iter()
-        .filter(|text| text.contains(SKILL_DESCRIPTION))
-        .collect::<Vec<_>>();
-    assert_eq!(1, catalog_fragments.len());
-    assert!(catalog_fragments[0].contains("environment resource:"));
+        .rfind(|text| text.contains(SKILL_DESCRIPTION) || text.contains(NO_SELECTED_SKILLS_MESSAGE))
 }

 fn assert_selected_plugin_tools(request: &ResponsesRequest) {