From ff31ba8d0a75f0a51644359265e194b93848a023 Mon Sep 17 00:00:00 2001
From: "Adam Perry @ OpenAI" <anp@openai.com>
Date: Mon, 22 Jun 2026 17:55:49 -0700
Subject: [PATCH] Make formatter output quiet on success (#29467)

## Why

`just fmt` is quite noisy even on successful runs.

## What

Only print output when a formatter fails.

- Buffer output from each formatter and print only a failed command and
its diagnostics.
- Prefix the `justfile` driver invocations with `@` so Just does not
echo the command itself.
- Retain rustfmt stderr on failure and cover silent-success and
failure-reporting behavior.

## Validation

- Confirmed `just fmt` and `just fmt-check` both exit successfully with
empty stdout and stderr.
---
 justfile                                      |  4 +-
 scripts/format.py                             | 38 +++-------
 .../test_artifact_workflow_and_binaries.py    | 73 ++++++++++++++++++-
 3 files changed, 85 insertions(+), 30 deletions(-)

diff --git a/justfile b/justfile
index 560fb1c22..03f87391a 100644
--- a/justfile
+++ b/justfile
@@ -38,11 +38,11 @@ app-server-test-client *args:
 
 # Format the justfile, Rust, Bazel/Starlark, Python SDK code, and Python scripts.
 fmt:
-    {{ python }} ../scripts/format.py
+    @{{ python }} ../scripts/format.py
 
 # Check formatting without modifying files.
 fmt-check:
-    {{ python }} ../scripts/format.py --check
+    @{{ python }} ../scripts/format.py --check
 
 fix *args:
     cargo clippy --fix --tests --allow-dirty {args}
diff --git a/scripts/format.py b/scripts/format.py
index 6fa5aab69..9310254f5 100644
--- a/scripts/format.py
+++ b/scripts/format.py
@@ -18,7 +18,6 @@ REPO_ROOT = Path(__file__).resolve().parents[1]
 class Command:
     args: tuple[str, ...]
     cwd: Path = REPO_ROOT
-    discard_stderr: bool = False
 
 
 @dataclass(frozen=True)
@@ -45,13 +44,7 @@ def rust_formatter_group(*, check: bool) -> FormatterGroup:
     args = ["cargo", "fmt", "--", "--config", "imports_granularity=Item"]
     if check:
         args.append("--check")
-    # Stable rustfmt repeats a nightly-only `imports_granularity` warning
-    # for each crate, so suppress that expected stderr noise.
-    command = Command(
-        tuple(args),
-        REPO_ROOT / "codex-rs",
-        discard_stderr=True,
-    )
+    command = Command(tuple(args), REPO_ROOT / "codex-rs")
     return FormatterGroup("Rust", (command,))
 
 
@@ -152,31 +145,27 @@ def formatter_groups(*, check: bool) -> tuple[FormatterGroup, ...]:
 
 def run_formatter_group(group: FormatterGroup) -> FormatterResult:
     """Run one formatter group sequentially and return its buffered output."""
-    output: list[str] = []
     for command in group.commands:
-        output.append(f"$ {shlex.join(command.args)}\n")
         try:
             process = subprocess.run(
                 command.args,
                 cwd=command.cwd,
                 stdout=subprocess.PIPE,
-                stderr=subprocess.DEVNULL
-                if command.discard_stderr
-                else subprocess.STDOUT,
+                stderr=subprocess.STDOUT,
                 text=True,
                 check=False,
             )
         except OSError as error:
-            output.append(f"{error}\n")
-            return FormatterResult(group.name, "".join(output), 1)
+            output = f"$ {shlex.join(command.args)}\n{error}\n"
+            return FormatterResult(group.name, output, 1)
 
-        output.append(process.stdout)
-        if process.stdout and not process.stdout.endswith("\n"):
-            output.append("\n")
         if process.returncode != 0:
-            return FormatterResult(group.name, "".join(output), process.returncode)
+            output = f"$ {shlex.join(command.args)}\n{process.stdout}"
+            if process.stdout and not process.stdout.endswith("\n"):
+                output += "\n"
+            return FormatterResult(group.name, output, process.returncode)
 
-    return FormatterResult(group.name, "".join(output), 0)
+    return FormatterResult(group.name, "", 0)
 
 
 def main() -> int:
@@ -191,16 +180,13 @@ def main() -> int:
 
     failures: list[str] = []
     with ThreadPoolExecutor(max_workers=len(groups)) as executor:
-        futures = {}
-        for group in groups:
-            print(f"Starting {group.name} formatter...", flush=True)
-            futures[executor.submit(run_formatter_group, group)] = group.name
+        futures = [executor.submit(run_formatter_group, group) for group in groups]
         for future in as_completed(futures):
             result = future.result()
-            print(f"==> {result.name} formatter finished")
-            print(result.output, end="")
             if result.returncode != 0:
                 failures.append(result.name)
+                print(f"==> {result.name} formatter failed", file=sys.stderr)
+                print(result.output, end="", file=sys.stderr)
 
     if failures:
         print(f"Formatting failed: {', '.join(failures)}", file=sys.stderr)
diff --git a/sdk/python/tests/test_artifact_workflow_and_binaries.py b/sdk/python/tests/test_artifact_workflow_and_binaries.py
index b2cfda53b..301a75aba 100644
--- a/sdk/python/tests/test_artifact_workflow_and_binaries.py
+++ b/sdk/python/tests/test_artifact_workflow_and_binaries.py
@@ -112,9 +112,9 @@ def test_root_fmt_recipes_use_shared_formatter_driver() -> None:
         "fmt_comment": (
             "# Format the justfile, Rust, Bazel/Starlark, Python SDK code, and Python scripts."
         ),
-        "fmt_commands": ["{{ python }} ../scripts/format.py"],
+        "fmt_commands": ["@{{ python }} ../scripts/format.py"],
         "fmt_check_comment": "# Check formatting without modifying files.",
-        "fmt_check_commands": ["{{ python }} ../scripts/format.py --check"],
+        "fmt_check_commands": ["@{{ python }} ../scripts/format.py --check"],
     }
 
     assert actual == expected, (
@@ -245,6 +245,75 @@ def test_root_format_driver_covers_all_formatter_groups(
     ]
 
 
+def test_root_format_driver_discards_successful_command_output(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    script = _load_root_format_script_module()
+    processes = iter(
+        (
+            script.subprocess.CompletedProcess(("first",), 0, "routine output\n"),
+            script.subprocess.CompletedProcess(("second",), 2, "failure output\n"),
+        )
+    )
+    monkeypatch.setattr(script.subprocess, "run", lambda *args, **kwargs: next(processes))
+    group = script.FormatterGroup(
+        "Test",
+        (script.Command(("first",)), script.Command(("second",))),
+    )
+
+    assert script.run_formatter_group(group) == script.FormatterResult(
+        "Test",
+        "$ second\nfailure output\n",
+        2,
+    )
+
+
+def test_root_format_driver_is_silent_when_all_formatters_succeed(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    script = _load_root_format_script_module()
+    groups = (script.FormatterGroup("Quiet", ()),)
+    monkeypatch.setattr(script, "formatter_groups", lambda *, check: groups)
+    monkeypatch.setattr(
+        script,
+        "run_formatter_group",
+        lambda group: script.FormatterResult(group.name, "hidden output\n", 0),
+    )
+    monkeypatch.setattr(sys, "argv", ["format.py"])
+
+    assert script.main() == 0
+    captured = capsys.readouterr()
+    assert (captured.out, captured.err) == ("", "")
+
+
+def test_root_format_driver_reports_only_failed_formatters(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    script = _load_root_format_script_module()
+    groups = (
+        script.FormatterGroup("Quiet", ()),
+        script.FormatterGroup("Broken", ()),
+    )
+    monkeypatch.setattr(script, "formatter_groups", lambda *, check: groups)
+
+    def fake_run(group):
+        if group.name == "Broken":
+            return script.FormatterResult(group.name, "$ broken\nfailure output\n", 2)
+        return script.FormatterResult(group.name, "hidden output\n", 0)
+
+    monkeypatch.setattr(script, "run_formatter_group", fake_run)
+    monkeypatch.setattr(sys, "argv", ["format.py"])
+
+    assert script.main() == 1
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert captured.err == (
+        "==> Broken formatter failed\n$ broken\nfailure output\nFormatting failed: Broken\n"
+    )
+
+
 def test_generate_types_wires_all_generation_steps() -> None:
     """The type generation command should refresh every schema-derived artifact."""
     source = (ROOT / "scripts" / "update_sdk_artifacts.py").read_text()