From 3c03bb4f182c91b07c12bcd4d79ac61df10dd084 Mon Sep 17 00:00:00 2001 From: jif Date: Fri, 26 Jun 2026 11:02:27 +0100 Subject: [PATCH] Test selected capabilities across unavailable resume (#30215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Why The selected-capability integration test already covers initial attachment and cold resume, but it resumes while the selected executor is still reachable. That leaves an important World State transition untested: a thread remembers its selected capability root, resumes while that environment is unavailable, and later sees the same stable environment return. ## What this tests This extends the existing end-to-end scenario: ```text selected executor available ↓ app-server stops and the executor goes away ↓ thread resumes with the executor unavailable ↓ skills, selected MCP tools, and connector attribution are absent ↓ the same environment ID is attached again ↓ skills, MCP tools, and connector attribution return ``` The test also checks that the unavailable snapshot explicitly tells the model that no selected-environment skills are currently available. After reattachment, it invokes the selected skill again and verifies that a new executor-owned MCP process starts. ## Scope This is test-only. It keeps the existing assumption that an environment ID refers to stable capability contents. It does not add package-file invalidation or live transport reconnect behavior. --- .../suite/v2/selected_capability_stack.rs | 60 +++++++++++++++---- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/codex-rs/app-server/tests/suite/v2/selected_capability_stack.rs b/codex-rs/app-server/tests/suite/v2/selected_capability_stack.rs index 052f286ff..6a70116ae 100644 --- a/codex-rs/app-server/tests/suite/v2/selected_capability_stack.rs +++ b/codex-rs/app-server/tests/suite/v2/selected_capability_stack.rs @@ -57,6 +57,7 @@ const SKILL_NAME: &str = "executor-demo:deploy"; const SKILL_DESCRIPTION: &str = "Deploy through the selected executor."; const SKILL_BODY_MARKER: &str = "SELECTED_EXECUTOR_SKILL_BODY"; const LOCAL_SKILL_BODY_MARKER: &str = "COLLIDING_LOCAL_SKILL_BODY"; +const NO_SELECTED_SKILLS_MESSAGE: &str = "No selected-environment skills are currently available."; const MCP_SERVER_NAME: &str = "executor_probe"; const MCP_CALL_ID: &str = "selected-executor-mcp-call"; const CONNECTOR_ID: &str = "calendar"; @@ -122,9 +123,17 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume() responses::ev_completed("unchanged-step"), ]), responses::sse(vec![ - responses::ev_response_created("resumed-step"), - responses::ev_assistant_message("resumed-message", "Ready after resume"), - responses::ev_completed("resumed-step"), + responses::ev_response_created("resumed-unavailable-step"), + responses::ev_assistant_message( + "resumed-unavailable-message", + "Unavailable after resume", + ), + responses::ev_completed("resumed-unavailable-step"), + ]), + responses::sse(vec![ + responses::ev_response_created("reattached-step"), + responses::ev_assistant_message("reattached-message", "Ready after reattach"), + responses::ev_completed("reattached-step"), ]), ], ) @@ -172,6 +181,7 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume() .await?; assert_eq!(first_mcp_pid, wait_for_pid_file(&fixture.pid_file).await?); + exec_server.kill().await?; drop(app_server); std::fs::remove_file(&fixture.pid_file)?; @@ -190,12 +200,30 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume() .await??; let ThreadResumeResponse { thread, .. } = to_response(response)?; assert_eq!(thread_id, thread.id); + + run_turn( + &mut app_server, + &thread_id, + "Inspect capabilities while the selected executor is unavailable", + fixture.environment_cwd.clone(), + ) + .await?; + let requests = response_mock.requests(); + assert_eq!(5, requests.len()); + assert_selected_plugin_tools_absent(&requests[4]); + assert!( + latest_selected_skill_update(&requests[4]) + .is_some_and(|text| text.contains(NO_SELECTED_SKILLS_MESSAGE)) + ); + + exec_server = spawn_exec_server(fixture.codex_home.path(), &fixture.exec_server_url).await?; + add_environment(&mut app_server, &fixture.exec_server_url).await?; wait_for_selected_mcp_server(&mut app_server, &thread_id).await?; run_turn( &mut app_server, &thread_id, - &format!("Use ${SKILL_NAME} after resuming the thread"), + &format!("Use ${SKILL_NAME} after reattaching the selected executor"), fixture.environment_cwd, ) .await?; @@ -203,13 +231,13 @@ async fn selected_capability_stack_tracks_environment_availability_and_resume() assert_ne!(first_mcp_pid, resumed_mcp_pid); let requests = response_mock.requests(); - assert_eq!(5, requests.len()); + assert_eq!(6, requests.len()); for request in &requests[1..4] { assert_selected_skill_is_injected(request, /*expected_count*/ 1); assert_selected_plugin_tools(request); } - assert_selected_skill_is_injected(&requests[4], /*expected_count*/ 2); - assert_selected_plugin_tools(&requests[4]); + assert_selected_skill_is_injected(&requests[5], /*expected_count*/ 2); + assert_selected_plugin_tools(&requests[5]); let output = requests[2].function_call_output(MCP_CALL_ID); let output = output["output"] .as_str() @@ -508,6 +536,10 @@ fn assert_selected_capabilities_absent(request: &ResponsesRequest) { .into_iter() .all(|text| !text.contains(SKILL_DESCRIPTION)) ); + assert_selected_plugin_tools_absent(request); +} + +fn assert_selected_plugin_tools_absent(request: &ResponsesRequest) { assert!( request .tool_by_name(&format!("mcp__{MCP_SERVER_NAME}"), "echo") @@ -540,13 +572,17 @@ fn assert_selected_skill_is_injected(request: &ResponsesRequest, expected_count: } fn assert_selected_skill_catalog_available(request: &ResponsesRequest) { - let catalog_fragments = request + let catalog_fragment = latest_selected_skill_update(request) + .expect("selected skill catalog update should be model-visible"); + assert!(catalog_fragment.contains(SKILL_DESCRIPTION)); + assert!(catalog_fragment.contains("environment resource:")); +} + +fn latest_selected_skill_update(request: &ResponsesRequest) -> Option { + request .message_input_texts("developer") .into_iter() - .filter(|text| text.contains(SKILL_DESCRIPTION)) - .collect::>(); - assert_eq!(1, catalog_fragments.len()); - assert!(catalog_fragments[0].contains("environment resource:")); + .rfind(|text| text.contains(SKILL_DESCRIPTION) || text.contains(NO_SELECTED_SKILLS_MESSAGE)) } fn assert_selected_plugin_tools(request: &ResponsesRequest) {