From 536952eeeeb5ee1a67c67224ef7ab24c221d669b Mon Sep 17 00:00:00 2001
From: Michael Bolin <mbolin@openai.com>
Date: Tue, 21 Apr 2026 18:35:47 -0700
Subject: [PATCH] bazel: run wrapped Rust unit test shards (#18913)

## Why

The `codex-tui` Cargo test suite was catching stale snapshot
expectations, but the matching Bazel unit-test target was still green.
The TUI unit target is wrapped by `workspace_root_test` so tests run
from the repository root and Insta can resolve Cargo-like snapshot
paths. After native Bazel sharding was enabled for that wrapped target,
rules_rust also inserted its own sharding wrapper around the Rust test
binary.

Those two wrappers did not compose: rules_rust's sharding wrapper
expects to run from its own runfiles cwd, while `workspace_root_test`
deliberately changes cwd to the repo root before invoking the test. In
that configuration, the inner wrapper could fail to enumerate the Rust
tests and exit successfully with empty shards, so snapshot regressions
were not being exercised by Bazel.

## What Changed

- Stop enabling rules_rust's inner `experimental_enable_sharding` for
unit-test binaries created by `codex_rust_crate`.
- Keep the configured `shard_count` on the outer `workspace_root_test`
target.
- Add libtest sharding directly to `workspace_root_test_launcher.sh.tpl`
and `workspace_root_test_launcher.bat.tpl` after the launcher has
resolved the actual test binary and established the intended
repository-root cwd.
- Partition tests by a stable FNV-1a hash of each libtest test name,
matching the stable-shard behavior we wanted without depending on the
inner rules_rust wrapper.
- Preserve ad-hoc local test filters by running the resolved test binary
directly when explicit test args are supplied.
- On Windows, run selected libtest names from the shard list in bounded
PowerShell batches instead of concatenating every selected test into one
`cmd.exe` command line.

This PR is stacked on top of #18912, which contains only the snapshot
expectation updates exposed once the Bazel target actually runs the TUI
unit tests. It is also the reason #18916 becomes visible: once this
wrapper fix makes Bazel execute the affected `codex-core` test, that
test needs its own executable-path setup fixed.

## Verification

- `cargo test -p codex-tui`
- `bazel test //codex-rs/tui:tui-unit-tests --test_output=errors`
- `bazel test //codex-rs/tui:all --test_output=errors`
- `bash -n workspace_root_test_launcher.sh.tpl`
- Exercised the Windows PowerShell batching fragment locally with a fake
test binary and shard-list file.
---
 defs.bzl                             |  8 ++--
 workspace_root_test_launcher.bat.tpl | 67 ++++++++++++++++++++++++++++
 workspace_root_test_launcher.sh.tpl  | 66 +++++++++++++++++++++++++++
 3 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/defs.bzl b/defs.bzl
index 53114a577..d1e38ea91 100644
--- a/defs.bzl
+++ b/defs.bzl
@@ -255,10 +255,9 @@ def codex_rust_crate(
         unit_test_name = name + "-unit-tests"
         unit_test_binary = name + "-unit-tests-bin"
         unit_test_shard_count = _test_shard_count(test_shard_counts, unit_test_name)
-        unit_test_binary_kwargs = {}
-        if unit_test_shard_count:
-            unit_test_binary_kwargs["experimental_enable_sharding"] = True
-
+        # Shard at the workspace_root_test layer. rules_rust's sharding wrapper
+        # expects to run from its own runfiles cwd, while workspace_root_test
+        # deliberately changes cwd so Insta sees Cargo-like snapshot paths.
         rust_test(
             name = unit_test_binary,
             crate = name,
@@ -277,7 +276,6 @@ def codex_rust_crate(
             rustc_env = rustc_env,
             data = test_data_extra,
             tags = test_tags + ["manual"],
-            **unit_test_binary_kwargs
         )
 
         unit_test_kwargs = {}
diff --git a/workspace_root_test_launcher.bat.tpl b/workspace_root_test_launcher.bat.tpl
index 6b7222cca..af82e5ecf 100644
--- a/workspace_root_test_launcher.bat.tpl
+++ b/workspace_root_test_launcher.bat.tpl
@@ -12,9 +12,76 @@ if errorlevel 1 exit /b 1
 
 set "INSTA_WORKSPACE_ROOT=%workspace_root%"
 cd /d "%workspace_root%" || exit /b 1
+
+set "TOTAL_SHARDS=%RULES_RUST_TEST_TOTAL_SHARDS%"
+if not defined TOTAL_SHARDS set "TOTAL_SHARDS=%TEST_TOTAL_SHARDS%"
+if defined TOTAL_SHARDS if not "%TOTAL_SHARDS%"=="0" (
+  call :run_sharded_libtest %*
+  exit /b !ERRORLEVEL!
+)
+
 "%test_bin%" %*
 exit /b %ERRORLEVEL%
 
+:run_sharded_libtest
+if defined TEST_SHARD_STATUS_FILE if defined TEST_TOTAL_SHARDS if not "%TEST_TOTAL_SHARDS%"=="0" (
+  type nul > "%TEST_SHARD_STATUS_FILE%"
+)
+
+if not "%~1"=="" (
+  "%test_bin%" %*
+  exit /b !ERRORLEVEL!
+)
+
+set "SHARD_INDEX=%RULES_RUST_TEST_SHARD_INDEX%"
+if not defined SHARD_INDEX set "SHARD_INDEX=%TEST_SHARD_INDEX%"
+if not defined SHARD_INDEX (
+  >&2 echo TEST_SHARD_INDEX or RULES_RUST_TEST_SHARD_INDEX must be set when sharding is enabled
+  exit /b 1
+)
+
+set "TEMP_ROOT=%TEST_TMPDIR%"
+if not defined TEMP_ROOT set "TEMP_ROOT=%TEMP%"
+if not defined TEMP_ROOT set "TEMP_ROOT=."
+:CREATE_TEMP_DIR
+set "TEMP_DIR=%TEMP_ROOT%\workspace_root_test_sharding_!RANDOM!_!RANDOM!_!RANDOM!"
+mkdir "!TEMP_DIR!" 2>nul
+if errorlevel 1 goto :CREATE_TEMP_DIR
+set "TEMP_LIST=!TEMP_DIR!\list.txt"
+set "TEMP_SHARD_LIST=!TEMP_DIR!\shard.txt"
+
+"%test_bin%" --list --format terse > "!TEMP_LIST!"
+if errorlevel 1 (
+  rmdir /s /q "!TEMP_DIR!" 2>nul
+  exit /b 1
+)
+
+powershell.exe -NoProfile -ExecutionPolicy Bypass -Command ^
+  "$ErrorActionPreference = 'Stop';" ^
+  "$tests = @(Get-Content -LiteralPath $env:TEMP_LIST | Where-Object { $_.EndsWith(': test') } | ForEach-Object { $_.Substring(0, $_.Length - 6) });" ^
+  "[Array]::Sort($tests, [StringComparer]::Ordinal);" ^
+  "$totalShards = [uint32]$env:TOTAL_SHARDS; $shardIndex = [uint32]$env:SHARD_INDEX;" ^
+  "$fnvPrime = [uint64]16777619; $u32Mask = [uint64]4294967295;" ^
+  "foreach ($test in $tests) { $hash = [uint32]2166136261; foreach ($byte in [Text.Encoding]::UTF8.GetBytes($test)) { $hash = [uint32](([uint64]($hash -bxor $byte) * $fnvPrime) -band $u32Mask) }; if (($hash %% $totalShards) -eq $shardIndex) { $test } }" ^
+  > "!TEMP_SHARD_LIST!"
+if errorlevel 1 (
+  rmdir /s /q "!TEMP_DIR!" 2>nul
+  exit /b 1
+)
+
+powershell.exe -NoProfile -ExecutionPolicy Bypass -Command ^
+  "$ErrorActionPreference = 'Stop';" ^
+  "$testBin = $env:test_bin;" ^
+  "$tests = @(Get-Content -LiteralPath $env:TEMP_SHARD_LIST);" ^
+  "$failed = $false; $limit = 7000; $batch = @(); $batchChars = $testBin.Length + 8;" ^
+  "function Invoke-TestBatch { if ($script:batch.Count -eq 0) { return }; & $script:testBin @script:batch '--exact'; if ($LASTEXITCODE -ne 0) { $script:failed = $true }; $script:batch = @(); $script:batchChars = $script:testBin.Length + 8 }" ^
+  "foreach ($test in $tests) { $argChars = $test.Length + 3; if (($batch.Count -gt 0) -and ($batchChars + $argChars -gt $limit)) { Invoke-TestBatch }; $batch += $test; $batchChars += $argChars }" ^
+  "Invoke-TestBatch; if ($failed) { exit 1 }"
+set "TEST_EXIT=%ERRORLEVEL%"
+
+rmdir /s /q "!TEMP_DIR!" 2>nul
+exit /b !TEST_EXIT!
+
 :resolve_runfile
 setlocal EnableExtensions EnableDelayedExpansion
 set "logical_path=%~2"
diff --git a/workspace_root_test_launcher.sh.tpl b/workspace_root_test_launcher.sh.tpl
index 1409e5672..1ba752506 100644
--- a/workspace_root_test_launcher.sh.tpl
+++ b/workspace_root_test_launcher.sh.tpl
@@ -48,6 +48,72 @@ workspace_root_marker="$(resolve_runfile "__WORKSPACE_ROOT_MARKER__")"
 workspace_root="$(dirname "$(dirname "$(dirname "${workspace_root_marker}")")")"
 test_bin="$(resolve_runfile "__TEST_BIN__")"
 
+test_shard_index() {
+  local test_name="$1"
+  # FNV-1a 32-bit hash. Keep this stable so adding one test does not reshuffle
+  # unrelated tests between shards.
+  local hash=2166136261
+  local byte
+  local char
+  local i
+  local LC_ALL=C
+
+  for ((i = 0; i < ${#test_name}; i++)); do
+    char="${test_name:i:1}"
+    printf -v byte "%d" "'$char"
+    hash=$(( ((hash ^ byte) * 16777619) & 0xffffffff ))
+  done
+
+  echo $(( hash % TOTAL_SHARDS ))
+}
+
+run_sharded_libtest() {
+  if [[ -n "${TEST_SHARD_STATUS_FILE:-}" && "${TEST_TOTAL_SHARDS:-0}" != "0" ]]; then
+    touch "${TEST_SHARD_STATUS_FILE}"
+  fi
+
+  # Extra libtest args are usually ad-hoc local filters. Preserve those exactly
+  # rather than combining them with generated exact filters.
+  if [[ $# -gt 0 ]]; then
+    exec "${test_bin}" "$@"
+  fi
+
+  if [[ -z "${SHARD_INDEX}" ]]; then
+    echo "TEST_SHARD_INDEX or RULES_RUST_TEST_SHARD_INDEX must be set when sharding is enabled" >&2
+    exit 1
+  fi
+
+  local list_output
+  local test_list
+  list_output="$("${test_bin}" --list --format terse)"
+  test_list="$(printf '%s\n' "${list_output}" | grep ': test$' | sed 's/: test$//' | LC_ALL=C sort || true)"
+
+  if [[ -z "${test_list}" ]]; then
+    exit 0
+  fi
+
+  local shard_tests=()
+  local test_name
+  while IFS= read -r test_name; do
+    if (( $(test_shard_index "${test_name}") == SHARD_INDEX )); then
+      shard_tests+=("${test_name}")
+    fi
+  done <<< "${test_list}"
+
+  if [[ ${#shard_tests[@]} -eq 0 ]]; then
+    exit 0
+  fi
+
+  exec "${test_bin}" "${shard_tests[@]}" --exact
+}
+
 export INSTA_WORKSPACE_ROOT="${workspace_root}"
 cd "${workspace_root}"
+
+TOTAL_SHARDS="${RULES_RUST_TEST_TOTAL_SHARDS:-${TEST_TOTAL_SHARDS:-}}"
+SHARD_INDEX="${RULES_RUST_TEST_SHARD_INDEX:-${TEST_SHARD_INDEX:-}}"
+if [[ -n "${TOTAL_SHARDS}" && "${TOTAL_SHARDS}" != "0" ]]; then
+  run_sharded_libtest "$@"
+fi
+
 exec "${test_bin}" "$@"