test: run app-server integration tests under Wine (#29788)

## Why Made a mistake when carving #29746 out of my local changes and the test was missing from the build graph. Oops! ## What Enable the app-server Wine exec test target. Remove the `manual` tag from generated Wine-exec test variants so wildcard Bazel test invocations select them. Refactor the smoke test to ensure it passes with current Windows support.
2026-07-01 00:31:56 +08:00 · 2026-06-23 22:23:29 -07:00
parent e639e8c4bd
commit b17f30eb2a
8 changed files with 64 additions and 52 deletions
@@ -11,6 +11,7 @@ codex_rust_crate(
        "//codex-rs/rmcp-client:test_stdio_server",
    ],
    integration_test_timeout = "long",
+    run_tests_with_wine_exec = True,
    test_shard_counts = {
        # Note app-server-all-test has a large number of integration tests, so
        # even a single shard can be quite slow. When there is a legitimate
@@ -1,19 +1,15 @@
+use anyhow::Context;
 use anyhow::Result;
 use app_test_support::TestAppServer;
-use app_test_support::create_final_assistant_message_sse_response;
-use app_test_support::create_mock_responses_server_sequence;
-use app_test_support::create_shell_command_sse_response;
 use app_test_support::to_response;
 use app_test_support::write_mock_responses_config_toml;
-use codex_app_server_protocol::CommandExecutionStatus;
-use codex_app_server_protocol::ItemCompletedNotification;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
-use codex_app_server_protocol::ThreadItem;
 use codex_app_server_protocol::ThreadStartParams;
 use codex_app_server_protocol::ThreadStartResponse;
 use codex_app_server_protocol::TurnStartParams;
 use codex_app_server_protocol::UserInput as V2UserInput;
+use core_test_support::responses;
 use pretty_assertions::assert_eq;
 use std::collections::BTreeMap;
 use std::time::Duration;
@@ -23,17 +19,17 @@ use tokio::time::timeout;
 const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10);

 #[tokio::test]
-async fn thread_start_with_auto_env_uses_fixture_cwd() -> Result<()> {
-    let responses = vec![
-        create_shell_command_sse_response(
-            vec!["echo".to_string(), "auto-env-ok".to_string()],
-            /*workdir*/ None,
-            /*timeout_ms*/ None,
-            "cwd-call",
-        )?,
-        create_final_assistant_message_sse_response("done")?,
-    ];
-    let server = create_mock_responses_server_sequence(responses).await;
+async fn thread_start_with_auto_env_exposes_fixture_cwd_to_model() -> Result<()> {
+    let server = responses::start_mock_server().await;
+    let response_mock = responses::mount_sse_once(
+        &server,
+        responses::sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_assistant_message("msg-1", "done"),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
    let codex_home = TempDir::new()?;
    write_mock_responses_config_toml(
        codex_home.path(),
@@ -87,46 +83,25 @@ async fn thread_start_with_auto_env_uses_fixture_cwd() -> Result<()> {
    )
    .await??;

-    let command = timeout(DEFAULT_READ_TIMEOUT, async {
-        loop {
-            let notification = mcp
-                .read_stream_until_notification_message("item/completed")
-                .await?;
-            let completed: ItemCompletedNotification = serde_json::from_value(
-                notification
-                    .params
-                    .expect("item/completed params must be present"),
-            )?;
-            if let ThreadItem::CommandExecution { .. } = completed.item {
-                return Ok::<ThreadItem, anyhow::Error>(completed.item);
-            }
-        }
-    })
-    .await??;
-    let ThreadItem::CommandExecution {
-        cwd,
-        status,
-        exit_code,
-        ..
-    } = command
-    else {
-        unreachable!("loop returns only command execution items");
-    };
-    assert_eq!(
-        (cwd, status, exit_code),
-        (
-            expected_environment.cwd,
-            CommandExecutionStatus::Completed,
-            Some(0)
-        )
-    );
-
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("turn/completed"),
    )
    .await??;

+    let environment_context = response_mock
+        .single_request()
+        .message_input_texts("user")
+        .into_iter()
+        .find(|text| text.starts_with("<environment_context>"))
+        .context("environment context should be model visible")?;
+    let model_cwd = environment_context
+        .lines()
+        .find(|line| line.trim_start().starts_with("<cwd>"))
+        .map(str::trim);
+    let expected_cwd = format!("<cwd>{}</cwd>", expected_environment.cwd);
+    assert_eq!(model_cwd, Some(expected_cwd.as_str()));
+
    Ok(())
 }

@@ -648,6 +648,8 @@ async fn apply_patch_cli_delete_directory_reports_verification_error() -> Result

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()> {
+    // TODO(anp): Remove after apply_patch path handling supports target-native Windows paths.
+    skip_if_target_windows!(Ok(()), "asserts POSIX path traversal behavior");
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@@ -976,6 +978,8 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects() -> Result<()

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn apply_patch_shell_command_heredoc_with_cd_updates_relative_workdir() -> Result<()> {
+    // TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
+    skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc and cd command");
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -1258,6 +1262,8 @@ async fn apply_patch_custom_tool_streaming_emits_updated_changes() -> Result<()>

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<()> {
+    // TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
+    skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc and cd command");
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -1320,6 +1326,8 @@ async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<(

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn apply_patch_turn_diff_paths_stay_repo_relative_when_session_cwd_is_nested() -> Result<()> {
+    // TODO(anp): Remove after apply_patch diff fixtures use target-native paths.
+    skip_if_target_windows!(Ok(()), "asserts POSIX repository paths");
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness_with(|builder| {
@@ -1391,6 +1399,8 @@ async fn apply_patch_turn_diff_paths_stay_repo_relative_when_session_cwd_is_nest

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() -> Result<()> {
+    // TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
+    skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc");
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -1448,6 +1458,8 @@ async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() ->

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch() -> Result<()> {
+    // TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
+    skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc");
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@@ -31,6 +31,7 @@ use core_test_support::responses::sse;
 use core_test_support::responses::sse_completed;
 use core_test_support::responses::start_mock_server;
 use core_test_support::skip_if_no_network;
+use core_test_support::skip_if_target_windows;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::local_selections;
 use core_test_support::test_codex::test_codex;
@@ -795,6 +796,8 @@ async fn model_change_from_generated_image_to_text_preserves_prior_generated_ima

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn thread_rollback_after_generated_image_drops_entire_image_turn_history() -> Result<()> {
+    // TODO(anp): Remove after generated-image artifacts use target-native paths.
+    skip_if_target_windows!(Ok(()), "uses host-native generated-image artifact paths");
    skip_if_no_network!(Ok(()));

    let server = MockServer::start().await;
@@ -27,6 +27,7 @@ use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::namespace_child_tool;
 use core_test_support::responses::sse;
 use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_target_windows;
 use core_test_support::test_codex::TestCodex;
 use pretty_assertions::assert_eq;
 use serde_json::Value;
@@ -184,6 +185,9 @@ async fn run_extract_turn(test: &TestCodex, server: &MockServer) -> Result<Respo

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn codex_apps_file_params_upload_environment_files_before_mcp_tool_call() -> Result<()> {
+    // TODO(anp): Remove after file-upload fixtures support target-native Windows paths.
+    skip_if_target_windows!(Ok(()), "uses a host-native file-upload path");
+
    let server = start_mock_server().await;
    let apps_server = AppsTestServer::mount(&server).await?;
    mount_file_upload_mocks(&server, STREAMED_FILE_SIZE as u64).await;
@@ -521,6 +521,8 @@ async fn unified_exec_resolves_relative_workdir() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn unified_exec_respects_workdir_override() -> Result<()> {
+    // TODO(anp): Remove after workdir helpers use target-native paths and commands.
+    skip_if_target_windows!(Ok(()), "uses a POSIX pwd command and workdir path");
    skip_if_no_network!(Ok(()));
    skip_if_sandbox!(Ok(()));
    skip_if_host_windows!(Ok(()));
@@ -1277,6 +1279,8 @@ async fn unified_exec_terminal_interaction_captures_delayed_output() -> Result<(

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn unified_exec_emits_one_begin_and_one_end_event() -> Result<()> {
+    // TODO(anp): Remove after unified-exec fixtures use target-native commands.
+    skip_if_target_windows!(Ok(()), "uses bash and a POSIX sleep command");
    skip_if_no_network!(Ok(()));
    skip_if_sandbox!(Ok(()));
    skip_if_host_windows!(Ok(()));
@@ -571,7 +571,9 @@ def codex_rust_crate(
                test_bin = "//codex-rs/exec-server/testing:wine-exec-test-runner",
                workspace_root_marker = "//codex-rs/utils/cargo-bin:repo_root.marker",
                target_compatible_with = WINE_TEST_TARGET_COMPATIBLE_WITH,
-                tags = test_tags + ["manual"],
+                # This wrapper has no Rust sources and transitions a data
+                # dependency to a Windows toolchain the lint does not register.
+                tags = test_tags + ["no-argument-comment-lint"],
                **wine_test_kwargs
            )

@@ -18,8 +18,19 @@ if [[ "${RUNNER_OS:-}" != "Windows" ]]; then
  manual_rust_test_targets="$(printf '%s\n' "${manual_rust_test_targets}" | grep -v -- '-windows-cross-bin$' || true)"
 fi

+# Convert semantic lint opt-outs into negative target patterns so wildcard
+# builds do not analyze toolchains used only by those wrappers.
+excluded_targets="$(
+  ./.github/scripts/run-bazel-query-ci.sh \
+    --output=label \
+    -- 'attr(tags, "no-argument-comment-lint", //codex-rs/...)'
+)"
+
 # The lint configuration does not register the transitioned Windows toolchain.
 printf '%s\n' \
  "//codex-rs/..." \
  "-//codex-rs/core/tests/remote_env_windows:smoke-test"
+if [[ -n "${excluded_targets}" ]]; then
+  printf '%s\n' "${excluded_targets}" | sed 's/^/-/'
+fi
 printf '%s\n' "${manual_rust_test_targets}"