diff --git a/.bazelrc b/.bazelrc index a068b4484..a5fea4028 100644 --- a/.bazelrc +++ b/.bazelrc @@ -79,6 +79,10 @@ common:ci --disk_cache= # Shared config for the main Bazel CI workflow. common:ci-bazel --config=ci common:ci-bazel --build_metadata=TAG_workflow=bazel +# Bazel CI cross-compiles in several legs, and the V8-backed code-mode tests +# are not stable in that setup yet. Keep running the rest of the Rust +# integration suites through the workspace-root launcher. +common:ci-bazel --test_env=CODEX_BAZEL_TEST_SKIP_FILTERS=suite::code_mode:: # Shared config for Bazel-backed Rust linting. build:clippy --aspects=@rules_rust//rust:defs.bzl%rust_clippy_aspect @@ -164,9 +168,9 @@ common:ci-windows-cross --strategy=remote common:ci-windows-cross --strategy=TestRunner=local common:ci-windows-cross --local_test_jobs=4 common:ci-windows-cross --test_env=RUST_TEST_THREADS=1 -# Native Windows CI still covers these tests. The cross-built gnullvm binaries -# currently crash in V8-backed code-mode tests and hang in PowerShell AST parser -# tests when those binaries are run on the Windows runner. +# Native Windows CI still covers the PowerShell tests. The cross-built gnullvm +# binaries currently hang in PowerShell AST parser tests when those binaries are +# run on the Windows runner. common:ci-windows-cross --test_env=CODEX_BAZEL_TEST_SKIP_FILTERS=suite::code_mode::,powershell common:ci-windows-cross --platforms=//:windows_x86_64_gnullvm common:ci-windows-cross --extra_execution_platforms=//:rbe,//:windows_x86_64_msvc diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index d98bd4f87..aed3f7a1b 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -18,9 +18,9 @@ concurrency: jobs: test: # PRs use a fast Windows cross-compiled test leg for pre-merge signal. - # Post-merge pushes to main also run the native Windows test job below, - # which keeps V8/code-mode coverage without putting PR latency back on the - # critical path. + # Post-merge pushes to main also run the native Windows test job below for + # broader Windows signal without putting PR latency back on the critical + # path. Cargo CI owns V8/code-mode test coverage for now. timeout-minutes: 30 strategy: fail-fast: false @@ -46,8 +46,8 @@ jobs: # Windows fast path: build the windows-gnullvm binaries with Linux # RBE, then run the resulting Windows tests on the Windows runner. - # The main-only native Windows job below preserves full V8/code-mode - # coverage post-merge. + # Cargo CI preserves V8/code-mode coverage while Bazel CI keeps broad + # non-code-mode signal. - os: windows-latest target: x86_64-pc-windows-gnullvm runs-on: ${{ matrix.os }} @@ -88,6 +88,11 @@ jobs: # path. V8 consumers under `//codex-rs/...` still participate # transitively through `//...`. -//third_party/v8:all + # V8-backed code-mode tests are covered by Cargo CI. Bazel CI + # cross-compiles in several legs, and those tests are not stable in + # that setup yet. + -//codex-rs/code-mode:code-mode-unit-tests + -//codex-rs/v8-poc:v8-poc-unit-tests ) bazel_wrapper_args=( @@ -105,15 +110,6 @@ jobs: --windows-cross-compile --remote-download-toplevel ) - # Tradeoff: the Linux-RBE-built windows-gnullvm V8 archive - # currently crashes during direct V8/code-mode smoke tests on the - # Windows runner. Keep the broader fast Windows suite in PR CI and - # rely on the main-only native Windows job below for full - # V8/code-mode signal while we investigate the cross-built archive. - bazel_targets+=( - -//codex-rs/code-mode:code-mode-unit-tests - -//codex-rs/v8-poc:v8-poc-unit-tests - ) fi ./.github/scripts/run-bazel-ci.sh \ @@ -144,9 +140,8 @@ jobs: test-windows-native-main: # Native Windows Bazel tests are slower and frequently approach the - # 30-minute PR budget, but they provide the full V8/code-mode signal that - # the fast cross-compiled PR leg intentionally trades away. Run this only - # for post-merge commits to main and give it a larger timeout. + # 30-minute PR budget. Run this only for post-merge commits to main and give + # it a larger timeout. if: github.event_name == 'push' && github.ref == 'refs/heads/main' timeout-minutes: 40 runs-on: windows-latest @@ -174,6 +169,11 @@ jobs: # path. V8 consumers under `//codex-rs/...` still participate # transitively through `//...`. -//third_party/v8:all + # Keep this aligned with the main Bazel job. The native Windows + # job preserves broad post-merge coverage, but code-mode/V8 tests + # are covered by Cargo CI rather than Bazel for now. + -//codex-rs/code-mode:code-mode-unit-tests + -//codex-rs/v8-poc:v8-poc-unit-tests ) bazel_test_args=( diff --git a/defs.bzl b/defs.bzl index 043dec3c3..fc8b0e0da 100644 --- a/defs.bzl +++ b/defs.bzl @@ -243,10 +243,12 @@ def codex_rust_crate( targets generated from `tests/*.rs`. test_data_extra: Extra runtime data for tests. test_shard_counts: Mapping from generated test target name to Bazel - shard count. Matching tests use native Bazel sharding on the - original test label, while rules_rust assigns each Rust test case - to a stable bucket by hashing the test name. Matching tests are - also marked flaky, which gives them Bazel's default three attempts. + shard count. Matching tests use native Bazel sharding on the outer + workspace-root launcher, not rules_rust's inner sharding wrapper. + The launcher resolves the real Rust test binary through runfiles + and then assigns each libtest case to a stable bucket by hashing + the test name. Matching tests are also marked flaky, which gives + them Bazel's default three attempts. test_tags: Tags applied to unit + integration test targets. Typically used to disable the sandbox, but see https://bazel.build/reference/be/common-definitions#common.tags unit_test_timeout: Optional Bazel timeout for the unit-test target @@ -408,34 +410,91 @@ def codex_rust_crate( test_kwargs.update(integration_test_kwargs) test_shard_count = _test_shard_count(test_shard_counts, test_name) if test_shard_count: - test_kwargs["experimental_enable_sharding"] = True + # Put Bazel sharding on the label users/CI invoke. Do not set + # rules_rust's experimental_enable_sharding on the Rust test + # binary: that creates an intermediate wrapper that expects a + # symlink runfiles tree, while this repo intentionally runs with + # --noenable_runfiles and usually has only a runfiles manifest. test_kwargs["shard_count"] = test_shard_count test_kwargs["flaky"] = True - # Keep the existing integration test shape on non-gnullvm platforms. - # Windows cross tests need workspace_root_test so runfile env vars - # resolve to Windows-native absolute paths before the test starts. - rust_test( - name = test_name, - crate_name = test_crate_name, - crate_root = test, - srcs = [test], - data = native.glob(["tests/**"], allow_empty = True) + sanitized_binaries + test_data_extra, - compile_data = native.glob(["tests/**"], allow_empty = True) + integration_compile_data_extra, - deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra, - # Bazel has emitted both `codex-rs//...` and - # `../codex-rs//...` paths for `file!()`. Strip either - # prefix so Insta records Cargo-like metadata such as `core/tests/...`. - rustc_flags = rustc_flags_extra + WINDOWS_RUSTC_LINK_FLAGS + [ - "--remap-path-prefix=../codex-rs=", - "--remap-path-prefix=codex-rs=", - ], - rustc_env = rustc_env, - env = cargo_env, - target_compatible_with = WINDOWS_GNULLVM_INCOMPATIBLE, - tags = test_tags, - **test_kwargs - ) + integration_test_binary = test_name + "-bin" + + # There are three generated integration-test shapes: + # + # 1. Unsharded native tests keep the plain rust_test label for minimal + # churn and the usual rules_rust Cargo-like environment. + # 2. Sharded native tests split into a manual rust_test binary plus an + # outer workspace_root_test. The outer test action receives Bazel's + # sharding environment, resolves the real binary through the + # runfiles manifest, and implements stable libtest sharding itself. + # 3. Windows cross tests always use the workspace_root_test wrapper so + # runfile env vars become Windows-native absolute paths before the + # Rust process starts. + if test_shard_count: + # This target is intentionally a binary-like helper, not the public + # test target. The wrapper below owns cwd setup, runfile env + # materialization, sharding, and flaky retry behavior. + rust_test( + name = integration_test_binary, + crate_name = test_crate_name, + crate_root = test, + srcs = [test], + data = native.glob(["tests/**"], allow_empty = True) + sanitized_binaries + test_data_extra, + compile_data = native.glob(["tests/**"], allow_empty = True) + integration_compile_data_extra, + deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra, + # Bazel has emitted both `codex-rs//...` and + # `../codex-rs//...` paths for `file!()`. Strip either + # prefix so Insta records Cargo-like metadata such as `core/tests/...`. + rustc_flags = rustc_flags_extra + WINDOWS_RUSTC_LINK_FLAGS + [ + "--remap-path-prefix=../codex-rs=", + "--remap-path-prefix=codex-rs=", + ], + rustc_env = rustc_env, + target_compatible_with = WINDOWS_GNULLVM_INCOMPATIBLE, + tags = test_tags + ["manual"], + ) + + workspace_root_test( + name = test_name, + env = test_env, + # CARGO_BIN_EXE_* values are rlocation paths at analysis time. + # The launcher rewrites them to absolute paths at execution + # time so tests keep working after chdir_workspace_root and on + # manifest-only platforms. + runfile_env = cargo_env_runfiles, + test_bin = ":" + integration_test_binary, + workspace_root_marker = "//codex-rs/utils/cargo-bin:repo_root.marker", + target_compatible_with = WINDOWS_GNULLVM_INCOMPATIBLE, + tags = test_tags, + **test_kwargs + ) + else: + # For unsharded tests, the direct rust_test rule is still fine: + # there is no rules_rust sharding wrapper to bypass, and env can + # use rlocation paths directly because the test starts under + # Bazel's normal test environment. + rust_test( + name = test_name, + crate_name = test_crate_name, + crate_root = test, + srcs = [test], + data = native.glob(["tests/**"], allow_empty = True) + sanitized_binaries + test_data_extra, + compile_data = native.glob(["tests/**"], allow_empty = True) + integration_compile_data_extra, + deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra, + # Bazel has emitted both `codex-rs//...` and + # `../codex-rs//...` paths for `file!()`. Strip either + # prefix so Insta records Cargo-like metadata such as `core/tests/...`. + rustc_flags = rustc_flags_extra + WINDOWS_RUSTC_LINK_FLAGS + [ + "--remap-path-prefix=../codex-rs=", + "--remap-path-prefix=codex-rs=", + ], + rustc_env = rustc_env, + env = cargo_env, + target_compatible_with = WINDOWS_GNULLVM_INCOMPATIBLE, + tags = test_tags, + **test_kwargs + ) windows_cross_test_kwargs = {} windows_cross_test_kwargs.update(integration_test_kwargs) diff --git a/scripts/list-bazel-clippy-targets.sh b/scripts/list-bazel-clippy-targets.sh index 8cfa60ba1..b76fc2a83 100755 --- a/scripts/list-bazel-clippy-targets.sh +++ b/scripts/list-bazel-clippy-targets.sh @@ -38,7 +38,17 @@ else )" fi if [[ "${RUNNER_OS:-}" != "Windows" ]]; then + # Non-Windows clippy jobs lint the native test binaries; the + # Windows-cross binaries exist only for the fast Windows test leg. manual_rust_test_targets="$(printf '%s\n' "${manual_rust_test_targets}" | grep -v -- '-windows-cross-bin$' || true)" +elif [[ $windows_cross_compile -eq 1 ]]; then + # `bazel query` is intentionally pre-analysis and does not remove targets + # made incompatible by `target_compatible_with`. Sharded integration tests + # add native-only manual helpers such as `core-all-test-bin`, plus separate + # `core-all-test-windows-cross-bin` helpers for the Windows cross leg. Keep + # the Windows helpers and unit-test helpers, but do not pass the native-only + # sharded integration helpers as explicit clippy targets. + manual_rust_test_targets="$(printf '%s\n' "${manual_rust_test_targets}" | grep -v -- '-test-bin$' || true)" fi printf '%s\n' \