From 536952eeeeb5ee1a67c67224ef7ab24c221d669b Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Tue, 21 Apr 2026 18:35:47 -0700 Subject: [PATCH] bazel: run wrapped Rust unit test shards (#18913) ## Why The `codex-tui` Cargo test suite was catching stale snapshot expectations, but the matching Bazel unit-test target was still green. The TUI unit target is wrapped by `workspace_root_test` so tests run from the repository root and Insta can resolve Cargo-like snapshot paths. After native Bazel sharding was enabled for that wrapped target, rules_rust also inserted its own sharding wrapper around the Rust test binary. Those two wrappers did not compose: rules_rust's sharding wrapper expects to run from its own runfiles cwd, while `workspace_root_test` deliberately changes cwd to the repo root before invoking the test. In that configuration, the inner wrapper could fail to enumerate the Rust tests and exit successfully with empty shards, so snapshot regressions were not being exercised by Bazel. ## What Changed - Stop enabling rules_rust's inner `experimental_enable_sharding` for unit-test binaries created by `codex_rust_crate`. - Keep the configured `shard_count` on the outer `workspace_root_test` target. - Add libtest sharding directly to `workspace_root_test_launcher.sh.tpl` and `workspace_root_test_launcher.bat.tpl` after the launcher has resolved the actual test binary and established the intended repository-root cwd. - Partition tests by a stable FNV-1a hash of each libtest test name, matching the stable-shard behavior we wanted without depending on the inner rules_rust wrapper. - Preserve ad-hoc local test filters by running the resolved test binary directly when explicit test args are supplied. - On Windows, run selected libtest names from the shard list in bounded PowerShell batches instead of concatenating every selected test into one `cmd.exe` command line. This PR is stacked on top of #18912, which contains only the snapshot expectation updates exposed once the Bazel target actually runs the TUI unit tests. It is also the reason #18916 becomes visible: once this wrapper fix makes Bazel execute the affected `codex-core` test, that test needs its own executable-path setup fixed. ## Verification - `cargo test -p codex-tui` - `bazel test //codex-rs/tui:tui-unit-tests --test_output=errors` - `bazel test //codex-rs/tui:all --test_output=errors` - `bash -n workspace_root_test_launcher.sh.tpl` - Exercised the Windows PowerShell batching fragment locally with a fake test binary and shard-list file. --- defs.bzl | 8 ++-- workspace_root_test_launcher.bat.tpl | 67 ++++++++++++++++++++++++++++ workspace_root_test_launcher.sh.tpl | 66 +++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 5 deletions(-) diff --git a/defs.bzl b/defs.bzl index 53114a577..d1e38ea91 100644 --- a/defs.bzl +++ b/defs.bzl @@ -255,10 +255,9 @@ def codex_rust_crate( unit_test_name = name + "-unit-tests" unit_test_binary = name + "-unit-tests-bin" unit_test_shard_count = _test_shard_count(test_shard_counts, unit_test_name) - unit_test_binary_kwargs = {} - if unit_test_shard_count: - unit_test_binary_kwargs["experimental_enable_sharding"] = True - + # Shard at the workspace_root_test layer. rules_rust's sharding wrapper + # expects to run from its own runfiles cwd, while workspace_root_test + # deliberately changes cwd so Insta sees Cargo-like snapshot paths. rust_test( name = unit_test_binary, crate = name, @@ -277,7 +276,6 @@ def codex_rust_crate( rustc_env = rustc_env, data = test_data_extra, tags = test_tags + ["manual"], - **unit_test_binary_kwargs ) unit_test_kwargs = {} diff --git a/workspace_root_test_launcher.bat.tpl b/workspace_root_test_launcher.bat.tpl index 6b7222cca..af82e5ecf 100644 --- a/workspace_root_test_launcher.bat.tpl +++ b/workspace_root_test_launcher.bat.tpl @@ -12,9 +12,76 @@ if errorlevel 1 exit /b 1 set "INSTA_WORKSPACE_ROOT=%workspace_root%" cd /d "%workspace_root%" || exit /b 1 + +set "TOTAL_SHARDS=%RULES_RUST_TEST_TOTAL_SHARDS%" +if not defined TOTAL_SHARDS set "TOTAL_SHARDS=%TEST_TOTAL_SHARDS%" +if defined TOTAL_SHARDS if not "%TOTAL_SHARDS%"=="0" ( + call :run_sharded_libtest %* + exit /b !ERRORLEVEL! +) + "%test_bin%" %* exit /b %ERRORLEVEL% +:run_sharded_libtest +if defined TEST_SHARD_STATUS_FILE if defined TEST_TOTAL_SHARDS if not "%TEST_TOTAL_SHARDS%"=="0" ( + type nul > "%TEST_SHARD_STATUS_FILE%" +) + +if not "%~1"=="" ( + "%test_bin%" %* + exit /b !ERRORLEVEL! +) + +set "SHARD_INDEX=%RULES_RUST_TEST_SHARD_INDEX%" +if not defined SHARD_INDEX set "SHARD_INDEX=%TEST_SHARD_INDEX%" +if not defined SHARD_INDEX ( + >&2 echo TEST_SHARD_INDEX or RULES_RUST_TEST_SHARD_INDEX must be set when sharding is enabled + exit /b 1 +) + +set "TEMP_ROOT=%TEST_TMPDIR%" +if not defined TEMP_ROOT set "TEMP_ROOT=%TEMP%" +if not defined TEMP_ROOT set "TEMP_ROOT=." +:CREATE_TEMP_DIR +set "TEMP_DIR=%TEMP_ROOT%\workspace_root_test_sharding_!RANDOM!_!RANDOM!_!RANDOM!" +mkdir "!TEMP_DIR!" 2>nul +if errorlevel 1 goto :CREATE_TEMP_DIR +set "TEMP_LIST=!TEMP_DIR!\list.txt" +set "TEMP_SHARD_LIST=!TEMP_DIR!\shard.txt" + +"%test_bin%" --list --format terse > "!TEMP_LIST!" +if errorlevel 1 ( + rmdir /s /q "!TEMP_DIR!" 2>nul + exit /b 1 +) + +powershell.exe -NoProfile -ExecutionPolicy Bypass -Command ^ + "$ErrorActionPreference = 'Stop';" ^ + "$tests = @(Get-Content -LiteralPath $env:TEMP_LIST | Where-Object { $_.EndsWith(': test') } | ForEach-Object { $_.Substring(0, $_.Length - 6) });" ^ + "[Array]::Sort($tests, [StringComparer]::Ordinal);" ^ + "$totalShards = [uint32]$env:TOTAL_SHARDS; $shardIndex = [uint32]$env:SHARD_INDEX;" ^ + "$fnvPrime = [uint64]16777619; $u32Mask = [uint64]4294967295;" ^ + "foreach ($test in $tests) { $hash = [uint32]2166136261; foreach ($byte in [Text.Encoding]::UTF8.GetBytes($test)) { $hash = [uint32](([uint64]($hash -bxor $byte) * $fnvPrime) -band $u32Mask) }; if (($hash %% $totalShards) -eq $shardIndex) { $test } }" ^ + > "!TEMP_SHARD_LIST!" +if errorlevel 1 ( + rmdir /s /q "!TEMP_DIR!" 2>nul + exit /b 1 +) + +powershell.exe -NoProfile -ExecutionPolicy Bypass -Command ^ + "$ErrorActionPreference = 'Stop';" ^ + "$testBin = $env:test_bin;" ^ + "$tests = @(Get-Content -LiteralPath $env:TEMP_SHARD_LIST);" ^ + "$failed = $false; $limit = 7000; $batch = @(); $batchChars = $testBin.Length + 8;" ^ + "function Invoke-TestBatch { if ($script:batch.Count -eq 0) { return }; & $script:testBin @script:batch '--exact'; if ($LASTEXITCODE -ne 0) { $script:failed = $true }; $script:batch = @(); $script:batchChars = $script:testBin.Length + 8 }" ^ + "foreach ($test in $tests) { $argChars = $test.Length + 3; if (($batch.Count -gt 0) -and ($batchChars + $argChars -gt $limit)) { Invoke-TestBatch }; $batch += $test; $batchChars += $argChars }" ^ + "Invoke-TestBatch; if ($failed) { exit 1 }" +set "TEST_EXIT=%ERRORLEVEL%" + +rmdir /s /q "!TEMP_DIR!" 2>nul +exit /b !TEST_EXIT! + :resolve_runfile setlocal EnableExtensions EnableDelayedExpansion set "logical_path=%~2" diff --git a/workspace_root_test_launcher.sh.tpl b/workspace_root_test_launcher.sh.tpl index 1409e5672..1ba752506 100644 --- a/workspace_root_test_launcher.sh.tpl +++ b/workspace_root_test_launcher.sh.tpl @@ -48,6 +48,72 @@ workspace_root_marker="$(resolve_runfile "__WORKSPACE_ROOT_MARKER__")" workspace_root="$(dirname "$(dirname "$(dirname "${workspace_root_marker}")")")" test_bin="$(resolve_runfile "__TEST_BIN__")" +test_shard_index() { + local test_name="$1" + # FNV-1a 32-bit hash. Keep this stable so adding one test does not reshuffle + # unrelated tests between shards. + local hash=2166136261 + local byte + local char + local i + local LC_ALL=C + + for ((i = 0; i < ${#test_name}; i++)); do + char="${test_name:i:1}" + printf -v byte "%d" "'$char" + hash=$(( ((hash ^ byte) * 16777619) & 0xffffffff )) + done + + echo $(( hash % TOTAL_SHARDS )) +} + +run_sharded_libtest() { + if [[ -n "${TEST_SHARD_STATUS_FILE:-}" && "${TEST_TOTAL_SHARDS:-0}" != "0" ]]; then + touch "${TEST_SHARD_STATUS_FILE}" + fi + + # Extra libtest args are usually ad-hoc local filters. Preserve those exactly + # rather than combining them with generated exact filters. + if [[ $# -gt 0 ]]; then + exec "${test_bin}" "$@" + fi + + if [[ -z "${SHARD_INDEX}" ]]; then + echo "TEST_SHARD_INDEX or RULES_RUST_TEST_SHARD_INDEX must be set when sharding is enabled" >&2 + exit 1 + fi + + local list_output + local test_list + list_output="$("${test_bin}" --list --format terse)" + test_list="$(printf '%s\n' "${list_output}" | grep ': test$' | sed 's/: test$//' | LC_ALL=C sort || true)" + + if [[ -z "${test_list}" ]]; then + exit 0 + fi + + local shard_tests=() + local test_name + while IFS= read -r test_name; do + if (( $(test_shard_index "${test_name}") == SHARD_INDEX )); then + shard_tests+=("${test_name}") + fi + done <<< "${test_list}" + + if [[ ${#shard_tests[@]} -eq 0 ]]; then + exit 0 + fi + + exec "${test_bin}" "${shard_tests[@]}" --exact +} + export INSTA_WORKSPACE_ROOT="${workspace_root}" cd "${workspace_root}" + +TOTAL_SHARDS="${RULES_RUST_TEST_TOTAL_SHARDS:-${TEST_TOTAL_SHARDS:-}}" +SHARD_INDEX="${RULES_RUST_TEST_SHARD_INDEX:-${TEST_SHARD_INDEX:-}}" +if [[ -n "${TOTAL_SHARDS}" && "${TOTAL_SHARDS}" != "0" ]]; then + run_sharded_libtest "$@" +fi + exec "${test_bin}" "$@"