test: run app-server integration tests under Wine (#29788)

## Why

Made a mistake when carving #29746 out of my local changes and the test
was missing from the build graph. Oops!

## What

Enable the app-server Wine exec test target. Remove the `manual` tag
from generated Wine-exec test variants so wildcard Bazel test
invocations select them. Refactor the smoke test to ensure it passes
with current Windows support.
This commit is contained in:
Adam Perry @ OpenAI
2026-06-23 22:23:29 -07:00
committed by GitHub
Unverified
parent e639e8c4bd
commit b17f30eb2a
8 changed files with 64 additions and 52 deletions
+1
View File
@@ -11,6 +11,7 @@ codex_rust_crate(
"//codex-rs/rmcp-client:test_stdio_server",
],
integration_test_timeout = "long",
run_tests_with_wine_exec = True,
test_shard_counts = {
# Note app-server-all-test has a large number of integration tests, so
# even a single shard can be quite slow. When there is a legitimate
+26 -51
View File
@@ -1,19 +1,15 @@
use anyhow::Context;
use anyhow::Result;
use app_test_support::TestAppServer;
use app_test_support::create_final_assistant_message_sse_response;
use app_test_support::create_mock_responses_server_sequence;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use app_test_support::write_mock_responses_config_toml;
use codex_app_server_protocol::CommandExecutionStatus;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadStartParams;
use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::UserInput as V2UserInput;
use core_test_support::responses;
use pretty_assertions::assert_eq;
use std::collections::BTreeMap;
use std::time::Duration;
@@ -23,17 +19,17 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10);
#[tokio::test]
async fn thread_start_with_auto_env_uses_fixture_cwd() -> Result<()> {
let responses = vec![
create_shell_command_sse_response(
vec!["echo".to_string(), "auto-env-ok".to_string()],
/*workdir*/ None,
/*timeout_ms*/ None,
"cwd-call",
)?,
create_final_assistant_message_sse_response("done")?,
];
let server = create_mock_responses_server_sequence(responses).await;
async fn thread_start_with_auto_env_exposes_fixture_cwd_to_model() -> Result<()> {
let server = responses::start_mock_server().await;
let response_mock = responses::mount_sse_once(
&server,
responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_assistant_message("msg-1", "done"),
responses::ev_completed("resp-1"),
]),
)
.await;
let codex_home = TempDir::new()?;
write_mock_responses_config_toml(
codex_home.path(),
@@ -87,46 +83,25 @@ async fn thread_start_with_auto_env_uses_fixture_cwd() -> Result<()> {
)
.await??;
let command = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let notification = mcp
.read_stream_until_notification_message("item/completed")
.await?;
let completed: ItemCompletedNotification = serde_json::from_value(
notification
.params
.expect("item/completed params must be present"),
)?;
if let ThreadItem::CommandExecution { .. } = completed.item {
return Ok::<ThreadItem, anyhow::Error>(completed.item);
}
}
})
.await??;
let ThreadItem::CommandExecution {
cwd,
status,
exit_code,
..
} = command
else {
unreachable!("loop returns only command execution items");
};
assert_eq!(
(cwd, status, exit_code),
(
expected_environment.cwd,
CommandExecutionStatus::Completed,
Some(0)
)
);
timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("turn/completed"),
)
.await??;
let environment_context = response_mock
.single_request()
.message_input_texts("user")
.into_iter()
.find(|text| text.starts_with("<environment_context>"))
.context("environment context should be model visible")?;
let model_cwd = environment_context
.lines()
.find(|line| line.trim_start().starts_with("<cwd>"))
.map(str::trim);
let expected_cwd = format!("<cwd>{}</cwd>", expected_environment.cwd);
assert_eq!(model_cwd, Some(expected_cwd.as_str()));
Ok(())
}
@@ -648,6 +648,8 @@ async fn apply_patch_cli_delete_directory_reports_verification_error() -> Result
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()> {
// TODO(anp): Remove after apply_patch path handling supports target-native Windows paths.
skip_if_target_windows!(Ok(()), "asserts POSIX path traversal behavior");
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -976,6 +978,8 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects() -> Result<()
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_shell_command_heredoc_with_cd_updates_relative_workdir() -> Result<()> {
// TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc and cd command");
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -1258,6 +1262,8 @@ async fn apply_patch_custom_tool_streaming_emits_updated_changes() -> Result<()>
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<()> {
// TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc and cd command");
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -1320,6 +1326,8 @@ async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<(
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_turn_diff_paths_stay_repo_relative_when_session_cwd_is_nested() -> Result<()> {
// TODO(anp): Remove after apply_patch diff fixtures use target-native paths.
skip_if_target_windows!(Ok(()), "asserts POSIX repository paths");
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|builder| {
@@ -1391,6 +1399,8 @@ async fn apply_patch_turn_diff_paths_stay_repo_relative_when_session_cwd_is_nest
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() -> Result<()> {
// TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc");
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -1448,6 +1458,8 @@ async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() ->
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch() -> Result<()> {
// TODO(anp): Remove after apply_patch shell fixtures use target-native commands.
skip_if_target_windows!(Ok(()), "uses a POSIX shell heredoc");
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -31,6 +31,7 @@ use core_test_support::responses::sse;
use core_test_support::responses::sse_completed;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_target_windows;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::local_selections;
use core_test_support::test_codex::test_codex;
@@ -795,6 +796,8 @@ async fn model_change_from_generated_image_to_text_preserves_prior_generated_ima
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn thread_rollback_after_generated_image_drops_entire_image_turn_history() -> Result<()> {
// TODO(anp): Remove after generated-image artifacts use target-native paths.
skip_if_target_windows!(Ok(()), "uses host-native generated-image artifact paths");
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
@@ -27,6 +27,7 @@ use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::namespace_child_tool;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_target_windows;
use core_test_support::test_codex::TestCodex;
use pretty_assertions::assert_eq;
use serde_json::Value;
@@ -184,6 +185,9 @@ async fn run_extract_turn(test: &TestCodex, server: &MockServer) -> Result<Respo
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn codex_apps_file_params_upload_environment_files_before_mcp_tool_call() -> Result<()> {
// TODO(anp): Remove after file-upload fixtures support target-native Windows paths.
skip_if_target_windows!(Ok(()), "uses a host-native file-upload path");
let server = start_mock_server().await;
let apps_server = AppsTestServer::mount(&server).await?;
mount_file_upload_mocks(&server, STREAMED_FILE_SIZE as u64).await;
@@ -521,6 +521,8 @@ async fn unified_exec_resolves_relative_workdir() -> Result<()> {
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_respects_workdir_override() -> Result<()> {
// TODO(anp): Remove after workdir helpers use target-native paths and commands.
skip_if_target_windows!(Ok(()), "uses a POSIX pwd command and workdir path");
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_host_windows!(Ok(()));
@@ -1277,6 +1279,8 @@ async fn unified_exec_terminal_interaction_captures_delayed_output() -> Result<(
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_emits_one_begin_and_one_end_event() -> Result<()> {
// TODO(anp): Remove after unified-exec fixtures use target-native commands.
skip_if_target_windows!(Ok(()), "uses bash and a POSIX sleep command");
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_host_windows!(Ok(()));
+3 -1
View File
@@ -571,7 +571,9 @@ def codex_rust_crate(
test_bin = "//codex-rs/exec-server/testing:wine-exec-test-runner",
workspace_root_marker = "//codex-rs/utils/cargo-bin:repo_root.marker",
target_compatible_with = WINE_TEST_TARGET_COMPATIBLE_WITH,
tags = test_tags + ["manual"],
# This wrapper has no Rust sources and transitions a data
# dependency to a Windows toolchain the lint does not register.
tags = test_tags + ["no-argument-comment-lint"],
**wine_test_kwargs
)
@@ -18,8 +18,19 @@ if [[ "${RUNNER_OS:-}" != "Windows" ]]; then
manual_rust_test_targets="$(printf '%s\n' "${manual_rust_test_targets}" | grep -v -- '-windows-cross-bin$' || true)"
fi
# Convert semantic lint opt-outs into negative target patterns so wildcard
# builds do not analyze toolchains used only by those wrappers.
excluded_targets="$(
./.github/scripts/run-bazel-query-ci.sh \
--output=label \
-- 'attr(tags, "no-argument-comment-lint", //codex-rs/...)'
)"
# The lint configuration does not register the transitioned Windows toolchain.
printf '%s\n' \
"//codex-rs/..." \
"-//codex-rs/core/tests/remote_env_windows:smoke-test"
if [[ -n "${excluded_targets}" ]]; then
printf '%s\n' "${excluded_targets}" | sed 's/^/-/'
fi
printf '%s\n' "${manual_rust_test_targets}"