[codex] Remove legacy shell output formatting paths (#22706)

## Why

The client and tool pipeline still carried compatibility code for legacy
structured shell output. Current shell and apply_patch responses are
already plain text for model consumption, so keeping a
JSON-serialization path plus shell-item rewrite logic makes the request
formatter and tests preserve a format we do not need anymore.

## What Changed

- Removed the client-side shell output rewrite from
`core/src/client_common.rs`.
- Removed the structured exec-output formatter and the shell `freeform`
switch so tool emitters use one model-facing formatter.
- Collapsed apply_patch/shell serialization tests around the remaining
plain-text output expectations and removed duplicate one-variant
parameterized cases.
- Kept the `ApplyPatchModelOutput::ShellCommandViaHeredoc` compatibility
input shape, but no longer treats it as a separate output-format mode.

## Validation

- `cargo test -p codex-core client_common`
- `cargo test -p codex-core shell_serialization`
- `cargo test -p codex-core apply_patch_cli`
- `just fix -p codex-core`

## Documentation

No external Codex documentation update is needed.
This commit is contained in:
pakrym-oai
2026-05-18 09:57:54 -07:00
committed by GitHub
Unverified
parent adca1b643f
commit 82061660ae
11 changed files with 186 additions and 707 deletions
+1 -110
View File
@@ -2,13 +2,10 @@ pub use codex_api::ResponseEvent;
use codex_config::types::Personality;
use codex_protocol::error::Result;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::ResponseItem;
use codex_tools::ToolSpec;
use futures::Stream;
use serde::Deserialize;
use serde_json::Value;
use std::collections::HashSet;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
@@ -64,116 +61,10 @@ impl Default for Prompt {
impl Prompt {
pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
let mut input = self.input.clone();
// when using the *Freeform* apply_patch tool specifically, tool outputs
// should be structured text, not json. Do NOT reserialize when using
// the Function tool - note that this differs from the check above for
// instructions. We declare the result as a named variable for clarity.
let is_freeform_apply_patch_tool_present = self.tools.iter().any(|tool| match tool {
ToolSpec::Freeform(f) => f.name == "apply_patch",
_ => false,
});
if is_freeform_apply_patch_tool_present {
reserialize_shell_outputs(&mut input);
}
input
self.input.clone()
}
}
fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
let mut shell_call_ids: HashSet<String> = HashSet::new();
items.iter_mut().for_each(|item| match item {
ResponseItem::LocalShellCall { call_id, id, .. } => {
if let Some(identifier) = call_id.clone().or_else(|| id.clone()) {
shell_call_ids.insert(identifier);
}
}
ResponseItem::CustomToolCall {
id: _,
status: _,
call_id,
name,
input: _,
} => {
if name == "apply_patch" {
shell_call_ids.insert(call_id.clone());
}
}
ResponseItem::FunctionCall { name, call_id, .. }
if is_shell_tool_name(name) || name == "apply_patch" =>
{
shell_call_ids.insert(call_id.clone());
}
ResponseItem::FunctionCallOutput {
call_id, output, ..
}
| ResponseItem::CustomToolCallOutput {
call_id, output, ..
} => {
if shell_call_ids.remove(call_id)
&& let Some(structured) = output
.text_content()
.and_then(parse_structured_shell_output)
{
output.body = FunctionCallOutputBody::Text(structured);
}
}
_ => {}
})
}
fn is_shell_tool_name(name: &str) -> bool {
name == "shell"
}
#[derive(Deserialize)]
struct ExecOutputJson {
output: String,
metadata: ExecOutputMetadataJson,
}
#[derive(Deserialize)]
struct ExecOutputMetadataJson {
exit_code: i32,
duration_seconds: f32,
}
fn parse_structured_shell_output(raw: &str) -> Option<String> {
let parsed: ExecOutputJson = serde_json::from_str(raw).ok()?;
Some(build_structured_output(&parsed))
}
fn build_structured_output(parsed: &ExecOutputJson) -> String {
let mut sections = Vec::new();
sections.push(format!("Exit code: {}", parsed.metadata.exit_code));
sections.push(format!(
"Wall time: {} seconds",
parsed.metadata.duration_seconds
));
let mut output = parsed.output.clone();
if let Some((stripped, total_lines)) = strip_total_output_header(&parsed.output) {
sections.push(format!("Total output lines: {total_lines}"));
output = stripped.to_string();
}
sections.push("Output:".to_string());
sections.push(output);
sections.join("\n")
}
fn strip_total_output_header(output: &str) -> Option<(&str, u32)> {
let after_prefix = output.strip_prefix("Total output lines: ")?;
let (total_segment, remainder) = after_prefix.split_once('\n')?;
let total_lines = total_segment.parse::<u32>().ok()?;
let remainder = remainder.strip_prefix('\n').unwrap_or(remainder);
Some((remainder, total_lines))
}
pub struct ResponseStream {
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
/// Signals the mapper task that the consumer stopped polling before the
-63
View File
@@ -3,7 +3,6 @@ use codex_api::ResponsesApiRequest;
use codex_api::TextControls;
use codex_api::create_text_param_for_request;
use codex_protocol::config_types::ServiceTier;
use codex_protocol::models::FunctionCallOutputPayload;
use pretty_assertions::assert_eq;
use super::*;
@@ -166,65 +165,3 @@ fn serializes_flex_service_tier_when_set() {
Some("flex")
);
}
#[test]
fn reserializes_shell_outputs_for_function_and_custom_tool_calls() {
let raw_output = r#"{"output":"hello","metadata":{"exit_code":0,"duration_seconds":0.5}}"#;
let expected_output = "Exit code: 0\nWall time: 0.5 seconds\nOutput:\nhello";
let mut items = vec![
ResponseItem::FunctionCall {
id: None,
name: "shell".to_string(),
namespace: None,
arguments: "{}".to_string(),
call_id: "call-1".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_text(raw_output.to_string()),
},
ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "call-2".to_string(),
name: "apply_patch".to_string(),
input: "*** Begin Patch".to_string(),
},
ResponseItem::CustomToolCallOutput {
call_id: "call-2".to_string(),
name: None,
output: FunctionCallOutputPayload::from_text(raw_output.to_string()),
},
];
reserialize_shell_outputs(&mut items);
assert_eq!(
items,
vec![
ResponseItem::FunctionCall {
id: None,
name: "shell".to_string(),
namespace: None,
arguments: "{}".to_string(),
call_id: "call-1".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_text(expected_output.to_string()),
},
ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "call-2".to_string(),
name: "apply_patch".to_string(),
input: "*** Begin Patch".to_string(),
},
ResponseItem::CustomToolCallOutput {
call_id: "call-2".to_string(),
name: None,
output: FunctionCallOutputPayload::from_text(expected_output.to_string()),
},
]
);
}
+2 -14
View File
@@ -116,7 +116,6 @@ pub(crate) enum ToolEmitter {
cwd: AbsolutePathBuf,
source: ExecCommandSource,
parsed_cmd: Vec<ParsedCommand>,
freeform: bool,
},
ApplyPatch {
changes: HashMap<PathBuf, FileChange>,
@@ -132,19 +131,13 @@ pub(crate) enum ToolEmitter {
}
impl ToolEmitter {
pub fn shell(
command: Vec<String>,
cwd: AbsolutePathBuf,
source: ExecCommandSource,
freeform: bool,
) -> Self {
pub fn shell(command: Vec<String>, cwd: AbsolutePathBuf, source: ExecCommandSource) -> Self {
let parsed_cmd = parse_command(&command);
Self::Shell {
command,
cwd,
source,
parsed_cmd,
freeform,
}
}
@@ -328,12 +321,7 @@ impl ToolEmitter {
output: &ExecToolCallOutput,
ctx: ToolEventCtx<'_>,
) -> String {
match self {
Self::Shell { freeform: true, .. } => {
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
}
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
}
super::format_exec_output_for_model(output, ctx.turn.truncation_policy)
}
pub async fn finish(
+1 -8
View File
@@ -52,7 +52,6 @@ struct RunExecLikeArgs {
turn: Arc<TurnContext>,
tracker: crate::tools::context::SharedTurnDiffTracker,
call_id: String,
freeform: bool,
shell_runtime_backend: ShellRuntimeBackend,
}
@@ -68,7 +67,6 @@ async fn run_exec_like(args: RunExecLikeArgs) -> Result<FunctionToolOutput, Func
turn,
tracker,
call_id,
freeform,
shell_runtime_backend,
} = args;
@@ -162,12 +160,7 @@ async fn run_exec_like(args: RunExecLikeArgs) -> Result<FunctionToolOutput, Func
}
let source = ExecCommandSource::Agent;
let emitter = ToolEmitter::shell(
exec_params.command.clone(),
exec_params.cwd.clone(),
source,
freeform,
);
let emitter = ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone(), source);
let event_ctx = ToolEventCtx::new(
session.as_ref(),
turn.as_ref(),
@@ -197,7 +197,6 @@ impl ToolExecutor<ToolInvocation> for ShellCommandHandler {
turn,
tracker,
call_id,
freeform: true,
shell_runtime_backend: self.shell_runtime_backend(),
})
.await
+1 -41
View File
@@ -23,7 +23,6 @@ use codex_utils_output_truncation::TruncationPolicy;
use codex_utils_output_truncation::formatted_truncate_text;
use codex_utils_output_truncation::truncate_text;
pub use router::ToolRouter;
use serde::Serialize;
// Telemetry preview limits: keep log events smaller than model budgets.
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
@@ -60,46 +59,7 @@ pub(crate) fn tool_user_shell_type(
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model_structured(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
let ExecToolCallOutput {
exit_code,
duration,
..
} = exec_output;
#[derive(Serialize)]
struct ExecMetadata {
exit_code: i32,
duration_seconds: f32,
}
#[derive(Serialize)]
struct ExecOutput<'a> {
output: &'a str,
metadata: ExecMetadata,
}
// round to 1 decimal place
let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
let formatted_output = format_exec_output_str(exec_output, truncation_policy);
let payload = ExecOutput {
output: &formatted_output,
metadata: ExecMetadata {
exit_code: *exit_code,
duration_seconds,
},
};
#[expect(clippy::expect_used)]
serde_json::to_string(&payload).expect("serialize ExecOutput")
}
pub fn format_exec_output_for_model_freeform(
pub fn format_exec_output_for_model(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
-15
View File
@@ -35,8 +35,6 @@ use wiremock::http::HeaderValue;
use wiremock::matchers::method;
use wiremock::matchers::path_regex;
use crate::test_codex::ApplyPatchModelOutput;
#[derive(Debug, Clone)]
pub struct ResponseMock {
requests: Arc<Mutex<Vec<ResponsesRequest>>>,
@@ -883,19 +881,6 @@ pub fn ev_local_shell_call(call_id: &str, status: &str, command: Vec<&str>) -> V
})
}
pub fn ev_apply_patch_call(
call_id: &str,
patch: &str,
output_type: ApplyPatchModelOutput,
) -> Value {
match output_type {
ApplyPatchModelOutput::Freeform => ev_apply_patch_custom_tool_call(call_id, patch),
ApplyPatchModelOutput::ShellCommandViaHeredoc => {
ev_apply_patch_shell_command_call_via_heredoc(call_id, patch)
}
}
}
/// Convenience: SSE event for an `apply_patch` custom tool call with raw patch
/// text. This mirrors the payload produced by the Responses API when the model
/// invokes `apply_patch` directly.
+3 -24
View File
@@ -184,20 +184,12 @@ fn docker_command_capture_stdout<const N: usize>(args: [&str; N]) -> Result<Stri
String::from_utf8(output.stdout).context("docker stdout must be utf-8")
}
/// A collection of different ways the model can output an apply_patch call
/// Non-default apply_patch model output shapes used by compatibility tests.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ApplyPatchModelOutput {
Freeform,
ShellCommandViaHeredoc,
}
/// A collection of different ways the model can output an apply_patch call
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ShellModelOutput {
ShellCommand,
// UnifiedExec has its own set of tests
}
/// Returns the permission fields required by `Op::UserTurn` for tests that
/// construct the op directly.
pub fn turn_permission_fields(
@@ -977,21 +969,8 @@ impl TestCodexHarness {
custom_tool_call_output_text(&bodies, call_id)
}
pub async fn apply_patch_output(
&self,
call_id: &str,
output_type: ApplyPatchModelOutput,
) -> String {
// Box the awaited output helpers so callers do not inline request
// capture and response parsing into their own async state.
match output_type {
ApplyPatchModelOutput::Freeform => {
Box::pin(self.custom_tool_call_output(call_id)).await
}
ApplyPatchModelOutput::ShellCommandViaHeredoc => {
Box::pin(self.function_call_stdout(call_id)).await
}
}
pub async fn apply_patch_output(&self, call_id: &str) -> String {
self.custom_tool_call_output(call_id).await
}
}
+126 -179
View File
@@ -3,8 +3,8 @@
use anyhow::Result;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use core_test_support::responses::ev_apply_patch_call;
use core_test_support::responses::ev_apply_patch_custom_tool_call;
use core_test_support::responses::ev_apply_patch_shell_command_call_via_heredoc;
use core_test_support::responses::ev_shell_command_call;
use core_test_support::test_codex::ApplyPatchModelOutput;
use pretty_assertions::assert_eq;
@@ -45,7 +45,6 @@ use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_with_timeout;
use serde_json::json;
use test_case::test_case;
use wiremock::Mock;
use wiremock::Respond;
use wiremock::ResponseTemplate;
@@ -182,11 +181,35 @@ pub async fn mount_apply_patch(
call_id: &str,
patch: &str,
assistant_msg: &str,
output_type: ApplyPatchModelOutput,
) {
mount_sse_sequence(
harness.server(),
apply_patch_responses(call_id, patch, assistant_msg, output_type),
apply_patch_responses(
call_id,
patch,
assistant_msg,
ev_apply_patch_custom_tool_call,
),
)
.await;
}
async fn mount_apply_patch_model_output(
harness: &TestCodexHarness,
call_id: &str,
patch: &str,
assistant_msg: &str,
model_output: ApplyPatchModelOutput,
) {
let apply_patch_call = match model_output {
ApplyPatchModelOutput::ShellCommandViaHeredoc => {
ev_apply_patch_shell_command_call_via_heredoc
}
};
mount_sse_sequence(
harness.server(),
apply_patch_responses(call_id, patch, assistant_msg, apply_patch_call),
)
.await;
}
@@ -195,12 +218,12 @@ fn apply_patch_responses(
call_id: &str,
patch: &str,
assistant_msg: &str,
output_type: ApplyPatchModelOutput,
apply_patch_call: fn(&str, &str) -> serde_json::Value,
) -> Vec<String> {
vec![
sse(vec![
ev_response_created("resp-1"),
ev_apply_patch_call(call_id, patch, output_type),
apply_patch_call(call_id, patch),
ev_completed("resp-1"),
]),
sse(vec![
@@ -231,20 +254,11 @@ async fn apply_patch_cli_uses_codex_self_exe_with_linux_sandbox_helper_alias() -
let patch = "*** Begin Patch\n*** Add File: helper-alias.txt\n+hello\n*** End Patch";
let call_id = "apply-helper-alias";
mount_apply_patch(
&harness,
call_id,
patch,
"done",
ApplyPatchModelOutput::Freeform,
)
.await;
mount_apply_patch(&harness, call_id, patch, "done").await;
harness.submit("please apply helper alias patch").await?;
let out = harness
.apply_patch_output(call_id, ApplyPatchModelOutput::Freeform)
.await;
let out = harness.apply_patch_output(call_id).await;
assert_regex_match(
r"(?s)^Exit code: 0.*Success\. Updated the following files:\nA helper-alias\.txt\n?$",
&out,
@@ -255,10 +269,7 @@ async fn apply_patch_cli_uses_codex_self_exe_with_linux_sandbox_helper_alias() -
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_cli_multiple_operations_integration(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_multiple_operations_integration() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -270,11 +281,11 @@ async fn apply_patch_cli_multiple_operations_integration(
let patch = "*** Begin Patch\n*** Add File: nested/new.txt\n+created\n*** Delete File: delete.txt\n*** Update File: modify.txt\n@@\n-line2\n+changed\n*** End Patch";
let call_id = "apply-multi-ops";
mount_apply_patch(&harness, call_id, patch, "done", output_type).await;
mount_apply_patch(&harness, call_id, patch, "done").await;
harness.submit("please apply multi-ops patch").await?;
let out = harness.apply_patch_output(call_id, output_type).await;
let out = harness.apply_patch_output(call_id).await;
let expected = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
@@ -297,9 +308,7 @@ D delete.txt
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_multiple_chunks(model_output: ApplyPatchModelOutput) -> Result<()> {
async fn apply_patch_cli_multiple_chunks() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -310,7 +319,7 @@ async fn apply_patch_cli_multiple_chunks(model_output: ApplyPatchModelOutput) ->
let patch = "*** Begin Patch\n*** Update File: multi.txt\n@@\n-line2\n+changed2\n@@\n-line4\n+changed4\n*** End Patch";
let call_id = "apply-multi-chunks";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply multi-chunk patch").await?;
@@ -322,11 +331,7 @@ async fn apply_patch_cli_multiple_chunks(model_output: ApplyPatchModelOutput) ->
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_moves_file_to_new_directory(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_moves_file_to_new_directory() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -335,7 +340,7 @@ async fn apply_patch_cli_moves_file_to_new_directory(
let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-old content\n+new content\n*** End Patch";
let call_id = "apply-move";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply move patch").await?;
@@ -348,11 +353,7 @@ async fn apply_patch_cli_moves_file_to_new_directory(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_updates_file_appends_trailing_newline(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_updates_file_appends_trailing_newline() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -363,7 +364,7 @@ async fn apply_patch_cli_updates_file_appends_trailing_newline(
let patch = "*** Begin Patch\n*** Update File: no_newline.txt\n@@\n-no newline at end\n+first line\n+second line\n*** End Patch";
let call_id = "apply-append-nl";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply newline patch").await?;
@@ -374,11 +375,7 @@ async fn apply_patch_cli_updates_file_appends_trailing_newline(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_insert_only_hunk_modifies_file(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_insert_only_hunk_modifies_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -389,7 +386,7 @@ async fn apply_patch_cli_insert_only_hunk_modifies_file(
let patch = "*** Begin Patch\n*** Update File: insert_only.txt\n@@\n alpha\n+beta\n omega\n*** End Patch";
let call_id = "apply-insert-only";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("insert lines via apply_patch").await?;
@@ -401,11 +398,7 @@ async fn apply_patch_cli_insert_only_hunk_modifies_file(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_move_overwrites_existing_destination(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_move_overwrites_existing_destination() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -417,7 +410,7 @@ async fn apply_patch_cli_move_overwrites_existing_destination(
let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-from\n+new\n*** End Patch";
let call_id = "apply-move-overwrite";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply move overwrite patch").await?;
@@ -430,11 +423,7 @@ async fn apply_patch_cli_move_overwrites_existing_destination(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_move_without_content_change_has_no_turn_diff() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -445,7 +434,7 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/name.txt\n@@\n same\n*** End Patch";
let call_id = "apply-move-no-change";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
submit_without_wait(&harness, "rename without content change").await?;
@@ -467,11 +456,7 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_add_overwrites_existing_file(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_add_overwrites_existing_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -480,7 +465,7 @@ async fn apply_patch_cli_add_overwrites_existing_file(
let patch = "*** Begin Patch\n*** Add File: duplicate.txt\n+new content\n*** End Patch";
let call_id = "apply-add-overwrite";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply add overwrite patch").await?;
@@ -492,22 +477,18 @@ async fn apply_patch_cli_add_overwrites_existing_file(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_invalid_hunk_header(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_rejects_invalid_hunk_header() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** Frobnicate File: foo\n*** End Patch";
let call_id = "apply-invalid-header";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply invalid header patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
@@ -521,11 +502,7 @@ async fn apply_patch_cli_rejects_invalid_hunk_header(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_reports_missing_context(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_reports_missing_context() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -535,11 +512,11 @@ async fn apply_patch_cli_reports_missing_context(
let patch =
"*** Begin Patch\n*** Update File: modify.txt\n@@\n-missing\n+changed\n*** End Patch";
let call_id = "apply-missing-context";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply missing context patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
@@ -554,22 +531,18 @@ async fn apply_patch_cli_reports_missing_context(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_reports_missing_target_file(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_reports_missing_target_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** Update File: missing.txt\n@@\n-nope\n+better\n*** End Patch";
let call_id = "apply-missing-file";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness.submit("attempt to update a missing file").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
"expected verification failure message"
@@ -587,22 +560,18 @@ async fn apply_patch_cli_reports_missing_target_file(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_delete_missing_file_reports_error(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_delete_missing_file_reports_error() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** Delete File: missing.txt\n*** End Patch";
let call_id = "apply-delete-missing";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness.submit("attempt to delete missing file").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
@@ -621,20 +590,18 @@ async fn apply_patch_cli_delete_missing_file_reports_error(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_empty_patch(model_output: ApplyPatchModelOutput) -> Result<()> {
async fn apply_patch_cli_rejects_empty_patch() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** End Patch";
let call_id = "apply-empty";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply empty patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("patch rejected: empty patch"),
"expected rejection for empty patch: {out}"
@@ -643,11 +610,7 @@ async fn apply_patch_cli_rejects_empty_patch(model_output: ApplyPatchModelOutput
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_delete_directory_reports_verification_error(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_delete_directory_reports_verification_error() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -656,22 +619,18 @@ async fn apply_patch_cli_delete_directory_reports_verification_error(
let patch = "*** Begin Patch\n*** Delete File: dir\n*** End Patch";
let call_id = "apply-delete-dir";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("delete a directory via apply_patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(out.contains("apply_patch verification failed"));
assert!(out.contains("Failed to read"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -687,7 +646,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
let patch = "*** Begin Patch\n*** Add File: ../escape.txt\n+outside\n*** End Patch";
let call_id = "apply-path-traversal";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness
.submit_with_permission_profile(
@@ -696,7 +655,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains(
"patch rejected: writing outside of the project; rejected by user approval settings"
@@ -712,10 +671,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc ; "shell_command_heredoc")]
async fn intercepted_apply_patch_verification_uses_local_sandbox(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn intercepted_apply_patch_verification_uses_local_sandbox() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_remote!(Ok(()), "symlink setup needs local filesystem link creation");
@@ -735,7 +691,14 @@ async fn intercepted_apply_patch_verification_uses_local_sandbox(
*** End Patch"#
);
let call_id = "apply-sandboxed-read";
mount_apply_patch(&harness, call_id, &patch, "fail", model_output).await;
mount_apply_patch_model_output(
&harness,
call_id,
&patch,
"fail",
ApplyPatchModelOutput::ShellCommandViaHeredoc,
)
.await;
harness
.submit_with_permission_profile(
@@ -744,7 +707,11 @@ async fn intercepted_apply_patch_verification_uses_local_sandbox(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.function_call_stdout(call_id).await;
assert!(
serde_json::from_str::<serde_json::Value>(&out).is_err(),
"expected heredoc apply_patch output to be plain text"
);
assert!(
out.contains("apply_patch verification failed"),
"expected sandboxed verification failure: {out}"
@@ -762,11 +729,7 @@ async fn intercepted_apply_patch_verification_uses_local_sandbox(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform ; "freeform")]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc ; "shell_command_heredoc")]
async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_remote!(
Ok(()),
@@ -810,7 +773,7 @@ async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace
*** End Patch"#
);
let call_id = "apply-symlink-escape";
mount_apply_patch(&harness, call_id, &patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, &patch, "fail").await;
harness
.submit_with_permission_profile(
@@ -819,7 +782,7 @@ async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert_eq!(
std::fs::read_to_string(&outside_file)?,
original_contents,
@@ -831,11 +794,7 @@ async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform ; "freeform")]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc ; "shell_command_heredoc")]
async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_remote!(
Ok(()),
@@ -871,7 +830,7 @@ async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
*** End Patch"#
);
let call_id = "apply-hard-link";
mount_apply_patch(&harness, call_id, &patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, &patch, "ok").await;
harness
.submit_with_permission_profile(
@@ -880,7 +839,7 @@ async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
if cfg!(windows) {
assert!(
out.contains("patch rejected: writing outside of the project"),
@@ -933,11 +892,7 @@ async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -955,7 +910,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
let patch = "*** Begin Patch\n*** Update File: stay.txt\n*** Move to: ../escape-move.txt\n@@\n-from\n+to\n*** End Patch";
let call_id = "apply-move-traversal";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness
.submit_with_permission_profile(
@@ -964,7 +919,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains(
"patch rejected: writing outside of the project; rejected by user approval settings"
@@ -980,11 +935,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_verification_failure_has_no_side_effects(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_verification_failure_has_no_side_effects() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -993,7 +944,7 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects(
let call_id = "apply-partial-no-side-effects";
let patch = "*** Begin Patch\n*** Add File: created.txt\n+hello\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch";
mount_apply_patch(&harness, call_id, patch, "failed", model_output).await;
mount_apply_patch(&harness, call_id, patch, "failed").await;
harness.submit("attempt partial apply patch").await?;
@@ -1393,14 +1344,7 @@ async fn apply_patch_turn_diff_paths_stay_repo_relative_when_session_cwd_is_nest
let call_id = "apply-nested-cwd-repo-relative";
let patch = "*** Begin Patch\n*** Update File: ../repo.txt\n@@\n-before\n+after\n*** End Patch";
mount_apply_patch(
&harness,
call_id,
patch,
"updated repo-relative path",
ApplyPatchModelOutput::Freeform,
)
.await;
mount_apply_patch(&harness, call_id, patch, "updated repo-relative path").await;
submit_without_wait(&harness, "update file outside nested cwd but inside repo").await?;
@@ -1485,10 +1429,7 @@ async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() ->
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1497,18 +1438,36 @@ async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch(
let patch_inner =
format!("*** Begin Patch\n*** Add File: {file_name}\n+lenient\n*** End Patch\n");
let call_id = "apply-lenient";
mount_apply_patch(&harness, call_id, patch_inner.as_str(), "ok", model_output).await;
mount_apply_patch_model_output(
&harness,
call_id,
patch_inner.as_str(),
"ok",
ApplyPatchModelOutput::ShellCommandViaHeredoc,
)
.await;
harness.submit("apply lenient heredoc patch").await?;
let out = harness.function_call_stdout(call_id).await;
assert!(
serde_json::from_str::<serde_json::Value>(&out).is_err(),
"expected heredoc apply_patch output to be plain text"
);
assert!(
out.contains("Success. Updated the following files:"),
"expected successful apply_patch output: {out}"
);
assert!(
out.contains(&format!("A {file_name}")),
"expected created file in apply_patch output: {out}"
);
assert_eq!(harness.read_file_text(file_name).await?, "lenient\n");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_end_of_file_anchor(model_output: ApplyPatchModelOutput) -> Result<()> {
async fn apply_patch_cli_end_of_file_anchor() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1517,7 +1476,7 @@ async fn apply_patch_cli_end_of_file_anchor(model_output: ApplyPatchModelOutput)
let patch = "*** Begin Patch\n*** Update File: tail.txt\n@@\n-last\n+end\n*** End of File\n*** End Patch";
let call_id = "apply-eof";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply EOF-anchored patch").await?;
assert_eq!(harness.read_file_text("tail.txt").await?, "alpha\nend\n");
@@ -1525,11 +1484,7 @@ async fn apply_patch_cli_end_of_file_anchor(model_output: ApplyPatchModelOutput)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_missing_second_chunk_context_rejected(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_missing_second_chunk_context_rejected() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1540,11 +1495,11 @@ async fn apply_patch_cli_missing_second_chunk_context_rejected(
let patch =
"*** Begin Patch\n*** Update File: two_chunks.txt\n@@\n-b\n+B\n\n-d\n+D\n*** End Patch";
let call_id = "apply-missing-ctx-2nd";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness.submit("apply missing context second chunk").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(out.contains("apply_patch verification failed"));
assert!(
out.contains("Failed to find expected lines in"),
@@ -1559,11 +1514,7 @@ async fn apply_patch_cli_missing_second_chunk_context_rejected(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_emits_turn_diff_event_with_unified_diff(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_emits_turn_diff_event_with_unified_diff() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1573,7 +1524,7 @@ async fn apply_patch_emits_turn_diff_event_with_unified_diff(
let call_id = "apply-diff-event";
let file = "udiff.txt";
let patch = format!("*** Begin Patch\n*** Add File: {file}\n+hello\n*** End Patch\n");
mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch.as_str(), "ok").await;
submit_without_wait(&harness, "emit diff").await?;
@@ -1789,11 +1740,7 @@ async fn apply_patch_clears_aggregated_diff_after_inexact_delta() -> Result<()>
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_change_context_disambiguates_target(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_change_context_disambiguates_target() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1805,7 +1752,7 @@ async fn apply_patch_change_context_disambiguates_target(
let patch =
"*** Begin Patch\n*** Update File: multi_ctx.txt\n@@ fn b\n-x=10\n+x=11\n*** End Patch";
let call_id = "apply-ctx";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply with change_context").await?;
+3 -16
View File
@@ -3650,22 +3650,9 @@ async fn post_tool_use_records_additional_context_for_apply_patch() -> Result<()
let tool_response = hook_inputs[0]["tool_response"]
.as_str()
.context("apply_patch tool_response should be a string")?;
let mut parsed_tool_response = serde_json::from_str::<Value>(tool_response)?;
if let Some(metadata) = parsed_tool_response
.get_mut("metadata")
.and_then(Value::as_object_mut)
{
let _ = metadata.remove("duration_seconds");
}
assert_eq!(
parsed_tool_response,
serde_json::json!({
"output": "Success. Updated the following files:\nA post_tool_use_apply_patch.txt\n",
"metadata": {
"exit_code": 0,
},
})
);
assert!(tool_response.starts_with("Exit code: 0"));
assert!(tool_response.contains("Success. Updated the following files:"));
assert!(tool_response.contains("A post_tool_use_apply_patch.txt"));
Ok(())
}
+49 -236
View File
@@ -12,16 +12,12 @@ use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::ApplyPatchModelOutput;
use core_test_support::test_codex::ShellModelOutput;
use core_test_support::test_codex::TestCodexBuilder;
use core_test_support::test_codex::test_codex;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
use serde_json::Value;
use serde_json::json;
use std::fs;
use test_case::test_case;
use crate::suite::apply_patch_cli::apply_patch_harness;
use crate::suite::apply_patch_cli::mount_apply_patch;
@@ -38,135 +34,68 @@ const FIXTURE_JSON: &str = r#"{
}
"#;
fn shell_responses(
call_id: &str,
command: Vec<&str>,
output_type: ShellModelOutput,
) -> Result<Vec<String>> {
match output_type {
ShellModelOutput::ShellCommand => {
let command = shlex::try_join(command)?;
let parameters = json!({
"command": command,
"timeout_ms": 2_000,
});
Ok(vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(
call_id,
"shell_command",
&serde_json::to_string(&parameters)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
])
}
}
}
fn configure_shell_model(
builder: TestCodexBuilder,
output_type: ShellModelOutput,
) -> TestCodexBuilder {
match output_type {
ShellModelOutput::ShellCommand => builder.with_model("test-gpt-5-codex"),
}
fn shell_responses(call_id: &str, command: Vec<&str>) -> Result<Vec<String>> {
let command = shlex::try_join(command)?;
let parameters = json!({
"command": command,
"timeout_ms": 2_000,
});
Ok(vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(
call_id,
"shell_command",
&serde_json::to_string(&parameters)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
])
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_is_structured_with_freeform_apply_patch(
output_type: ShellModelOutput,
) -> Result<()> {
async fn shell_output_preserves_fixture_json_as_freeform() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = configure_shell_model(test_codex(), output_type);
let test = builder.build(&server).await?;
let call_id = "shell-structured";
let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(
"run the structured shell command",
PermissionProfile::Disabled,
)
.await?;
let req = mock
.last_request()
.expect("structured shell output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("structured output string");
assert!(
serde_json::from_str::<Value>(output).is_err(),
"expected structured shell output to be plain text",
);
let expected_pattern = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
freeform shell
?$";
assert_regex_match(expected_pattern, output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_structures_fixture_with_serialization(
output_type: ShellModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = configure_shell_model(test_codex(), output_type);
let mut builder = test_codex().with_model("test-gpt-5-codex");
let test = builder.build(&server).await?;
let fixture_path = test.cwd.path().join("fixture.json");
fs::write(&fixture_path, FIXTURE_JSON)?;
let fixture_path_str = fixture_path.to_string_lossy().to_string();
let call_id = "shell-structured-fixture";
let call_id = "shell-freeform-fixture";
let responses = shell_responses(
call_id,
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
output_type,
)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(
"read the fixture JSON with structured output",
"read the fixture JSON with shell output",
PermissionProfile::Disabled,
)
.await?;
let req = mock
.last_request()
.expect("structured output request recorded");
let req = mock.last_request().expect("shell output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("structured output string");
.expect("shell output string");
assert!(
serde_json::from_str::<Value>(output).is_err(),
"expected structured output to be plain text"
"expected shell output to be plain text"
);
let (header, body) = output
.split_once("Output:\n")
.expect("structured output contains an Output section");
.expect("shell output contains an Output section");
assert_regex_match(
r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds$",
header.trim_end(),
@@ -180,34 +109,26 @@ async fn shell_output_structures_fixture_with_serialization(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_for_freeform_tool_records_duration(
output_type: ShellModelOutput,
) -> Result<()> {
async fn shell_output_records_duration() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = configure_shell_model(test_codex(), output_type);
let mut builder = test_codex().with_model("test-gpt-5-codex");
let test = builder.build(&server).await?;
let call_id = "shell-structured";
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "sleep 0.2"], output_type)?;
let call_id = "shell-freeform";
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "sleep 0.2"])?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(
"run the structured shell command",
PermissionProfile::Disabled,
)
.await?;
test.submit_turn_with_permission_profile("run the shell command", PermissionProfile::Disabled)
.await?;
let req = mock
.last_request()
.expect("structured output request recorded");
let req = mock.last_request().expect("shell output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("structured output string");
.expect("shell output string");
let expected_pattern = r#"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
@@ -221,7 +142,7 @@ $"#;
.captures(output)
.and_then(|caps| caps.get(1))
.and_then(|value| value.as_str().parse::<f32>().ok())
.expect("expected structured shell output to contain wall time seconds");
.expect("expected shell output to contain wall time seconds");
assert!(
wall_time_seconds > 0.1,
"expected wall time to be greater than zero seconds, got {wall_time_seconds}"
@@ -231,53 +152,7 @@ $"#;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_output_is_structured(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-structured";
let file_name = "structured.txt";
let patch = format!(
r#"*** Begin Patch
*** Add File: {file_name}
+from custom tool
*** End Patch
"#
);
mount_apply_patch(&harness, call_id, &patch, "done", output_type).await;
harness
.test()
.submit_turn_with_permission_profile(
"apply the patch via custom tool",
PermissionProfile::Disabled,
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
Success. Updated the following files:
A {file_name}
?$"
);
assert_regex_match(&expected_pattern, output.as_str());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_call_creates_file(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -287,7 +162,7 @@ async fn apply_patch_custom_tool_call_creates_file(
let patch = format!(
"*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
);
mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await;
mount_apply_patch(&harness, call_id, &patch, "apply_patch done").await;
harness
.test()
@@ -297,7 +172,7 @@ async fn apply_patch_custom_tool_call_creates_file(
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let output = harness.apply_patch_output(call_id).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
@@ -319,10 +194,7 @@ A {file_name}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_call_updates_existing_file(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -333,14 +205,7 @@ async fn apply_patch_custom_tool_call_updates_existing_file(
let patch = format!(
"*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
);
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch update done",
output_type,
)
.await;
mount_apply_patch(&harness, call_id, &patch, "apply_patch update done").await;
harness
.test()
@@ -350,7 +215,7 @@ async fn apply_patch_custom_tool_call_updates_existing_file(
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let output = harness.apply_patch_output(call_id).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
@@ -369,10 +234,7 @@ M {file_name}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_call_reports_failure_output(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -382,14 +244,7 @@ async fn apply_patch_custom_tool_call_reports_failure_output(
let patch = format!(
"*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
);
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch failure done",
output_type,
)
.await;
mount_apply_patch(&harness, call_id, &patch, "apply_patch failure done").await;
harness
.test()
@@ -399,7 +254,7 @@ async fn apply_patch_custom_tool_call_reports_failure_output(
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let output = harness.apply_patch_output(call_id).await;
let expected_output = format!(
"apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
@@ -411,49 +266,7 @@ async fn apply_patch_custom_tool_call_reports_failure_output(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_tool_output_is_structured(output_type: ApplyPatchModelOutput) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-function";
let file_name = "freeform_apply_patch.txt";
let patch =
format!("*** Begin Patch\n*** Add File: {file_name}\n+via apply_patch\n*** End Patch\n");
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch function done",
output_type,
)
.await;
harness
.test()
.submit_turn_with_permission_profile(
"apply the patch via freeform apply_patch",
PermissionProfile::Disabled,
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
Success. Updated the following files:
A {file_name}
?$"
);
assert_regex_match(&expected_pattern, output.as_str());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> {
async fn shell_output_is_freeform_for_nonzero_exit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -461,7 +274,7 @@ async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutp
let test = builder.build(&server).await?;
let call_id = "shell-nonzero-exit";
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?;
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"])?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(