mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
33cc928d33
When using Responses Lite, we should all use `additional_tools` and a developer item instead of the top level tools array & instructions field. This keeps things 1-to-1. Forced namespacing for _all_ tools will land in a following PR after some coordination & fixes in Responses API (around collisions & return items). The goal is to eventually expand the scope of this to _all_ requests from codex, but that will require larger coordination across providers & slower rollout.
529 lines
18 KiB
Rust
529 lines
18 KiB
Rust
use anyhow::Result;
|
|
use codex_features::Feature;
|
|
use codex_protocol::config_types::ServiceTier;
|
|
use codex_protocol::protocol::EventMsg;
|
|
use codex_protocol::protocol::Op;
|
|
use codex_protocol::protocol::ThreadSettingsOverrides;
|
|
use codex_protocol::user_input::UserInput;
|
|
use core_test_support::responses::WebSocketConnectionConfig;
|
|
use core_test_support::responses::ev_assistant_message;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::ev_shell_command_call;
|
|
use core_test_support::responses::start_websocket_server;
|
|
use core_test_support::responses::start_websocket_server_with_headers;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::test_codex::test_codex;
|
|
use core_test_support::wait_for_event;
|
|
use pretty_assertions::assert_eq;
|
|
use serde_json::Value;
|
|
use std::time::Duration;
|
|
|
|
const WS_V2_BETA_HEADER_VALUE: &str = "responses_websockets=2026-02-06";
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_model_switch_to_responses_lite_omits_top_level_tools() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_websocket_server(vec![vec![
|
|
vec![ev_response_created("warm-1"), ev_completed("warm-1")],
|
|
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
|
|
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
|
|
]])
|
|
.await;
|
|
|
|
let mut builder = test_codex()
|
|
.with_model_info_override("gpt-5.4", |model_info| {
|
|
model_info.use_responses_lite = true;
|
|
})
|
|
.with_model("gpt-5.3-codex");
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
|
|
test.submit_turn("non-lite turn").await?;
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
items: vec![UserInput::Text {
|
|
text: "lite turn".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: Default::default(),
|
|
thread_settings: ThreadSettingsOverrides {
|
|
model: Some("gpt-5.4".to_string()),
|
|
..Default::default()
|
|
},
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
|
|
assert_eq!(server.handshakes().len(), 1);
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 3);
|
|
let non_lite_turn = connection
|
|
.get(1)
|
|
.expect("missing non-lite turn request")
|
|
.body_json();
|
|
let lite_turn = connection
|
|
.get(2)
|
|
.expect("missing lite turn request")
|
|
.body_json();
|
|
|
|
assert_eq!(non_lite_turn["model"].as_str(), Some("gpt-5.3-codex"));
|
|
assert_eq!(lite_turn["model"].as_str(), Some("gpt-5.4"));
|
|
assert!(
|
|
non_lite_turn
|
|
.get("tools")
|
|
.and_then(Value::as_array)
|
|
.is_some_and(|tools| !tools.is_empty())
|
|
);
|
|
assert_eq!(lite_turn.get("previous_response_id"), None);
|
|
assert_eq!(lite_turn.get("tools"), None);
|
|
assert_eq!(lite_turn.get("instructions"), None);
|
|
let additional_tools = lite_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.and_then(|input| input.first())
|
|
.filter(|item| item.get("type").and_then(Value::as_str) == Some("additional_tools"))
|
|
.and_then(|item| item.get("tools"))
|
|
.and_then(Value::as_array)
|
|
.expect("lite turn should start with an additional_tools item");
|
|
assert!(!additional_tools.is_empty());
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_test_codex_shell_chain() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let call_id = "shell-command-call";
|
|
let server = start_websocket_server(vec![vec![
|
|
vec![
|
|
ev_response_created("resp-1"),
|
|
ev_shell_command_call(call_id, "echo websocket"),
|
|
ev_completed("resp-1"),
|
|
],
|
|
vec![
|
|
ev_response_created("resp-2"),
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
],
|
|
]])
|
|
.await;
|
|
|
|
let mut builder = test_codex().with_windows_cmd_shell();
|
|
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
test.submit_turn_with_policy("run the echo command", test.config.legacy_sandbox_policy())
|
|
.await?;
|
|
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 2);
|
|
|
|
let first_turn = connection
|
|
.first()
|
|
.expect("missing first turn request")
|
|
.body_json();
|
|
let second_turn = connection
|
|
.get(1)
|
|
.expect("missing second turn request")
|
|
.body_json();
|
|
|
|
assert_eq!(first_turn["type"].as_str(), Some("response.create"));
|
|
assert_eq!(second_turn["type"].as_str(), Some("response.create"));
|
|
|
|
let input_items = second_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.expect("second response.create input array");
|
|
assert!(!input_items.is_empty());
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_first_turn_uses_startup_prewarm_and_create() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_websocket_server(vec![vec![
|
|
vec![ev_response_created("warm-1"), ev_completed("warm-1")],
|
|
vec![
|
|
ev_response_created("resp-1"),
|
|
ev_assistant_message("msg-1", "hello"),
|
|
ev_completed("resp-1"),
|
|
],
|
|
]])
|
|
.await;
|
|
|
|
let mut builder = test_codex();
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
test.submit_turn_with_policy("hello", test.config.legacy_sandbox_policy())
|
|
.await?;
|
|
|
|
assert_eq!(server.handshakes().len(), 1);
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 2);
|
|
let warmup = connection
|
|
.first()
|
|
.expect("missing warmup request")
|
|
.body_json();
|
|
let turn = connection.get(1).expect("missing turn request").body_json();
|
|
assert_eq!(warmup["type"].as_str(), Some("response.create"));
|
|
assert_eq!(warmup["generate"].as_bool(), Some(false));
|
|
let warmup_metadata: Value = serde_json::from_str(
|
|
warmup["client_metadata"]["x-codex-turn-metadata"]
|
|
.as_str()
|
|
.expect("warmup turn metadata"),
|
|
)?;
|
|
assert_eq!(warmup_metadata["request_kind"].as_str(), Some("prewarm"));
|
|
assert_eq!(
|
|
warmup_metadata["window_id"].as_str(),
|
|
warmup["client_metadata"]["x-codex-window-id"].as_str()
|
|
);
|
|
assert!(
|
|
turn["tools"]
|
|
.as_array()
|
|
.is_some_and(|tools| !tools.is_empty()),
|
|
"expected request tools to be populated"
|
|
);
|
|
assert_eq!(turn["type"].as_str(), Some("response.create"));
|
|
let turn_metadata: Value = serde_json::from_str(
|
|
turn["client_metadata"]["x-codex-turn-metadata"]
|
|
.as_str()
|
|
.expect("turn metadata"),
|
|
)?;
|
|
assert_eq!(turn_metadata["request_kind"].as_str(), Some("turn"));
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_first_turn_handles_handshake_delay_with_startup_prewarm() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_websocket_server_with_headers(vec![WebSocketConnectionConfig {
|
|
requests: vec![
|
|
vec![ev_response_created("warm-1"), ev_completed("warm-1")],
|
|
vec![
|
|
ev_response_created("resp-1"),
|
|
ev_assistant_message("msg-1", "hello"),
|
|
ev_completed("resp-1"),
|
|
],
|
|
],
|
|
response_headers: Vec::new(),
|
|
// Delay handshake so turn processing must tolerate websocket startup latency.
|
|
accept_delay: Some(Duration::from_millis(150)),
|
|
close_after_requests: true,
|
|
}])
|
|
.await;
|
|
|
|
let mut builder = test_codex();
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
test.submit_turn_with_policy("hello", test.config.legacy_sandbox_policy())
|
|
.await?;
|
|
|
|
assert_eq!(server.handshakes().len(), 1);
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 2);
|
|
let warmup = connection
|
|
.first()
|
|
.expect("missing warmup request")
|
|
.body_json();
|
|
let turn = connection.get(1).expect("missing turn request").body_json();
|
|
assert_eq!(warmup["type"].as_str(), Some("response.create"));
|
|
assert_eq!(warmup["generate"].as_bool(), Some(false));
|
|
assert!(
|
|
turn["tools"]
|
|
.as_array()
|
|
.is_some_and(|tools| !tools.is_empty()),
|
|
"expected request tools to be populated"
|
|
);
|
|
assert_eq!(turn["type"].as_str(), Some("response.create"));
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_v2_test_codex_shell_chain() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let call_id = "shell-command-call";
|
|
let mut shell_command_call = ev_shell_command_call(call_id, "echo websocket");
|
|
shell_command_call["item"]["internal_chat_message_metadata_passthrough"] =
|
|
serde_json::json!({"turn_id": "turn-123"});
|
|
let server = start_websocket_server(vec![vec![
|
|
vec![ev_response_created("warm-1"), ev_completed("warm-1")],
|
|
vec![
|
|
ev_response_created("resp-1"),
|
|
shell_command_call,
|
|
ev_completed("resp-1"),
|
|
],
|
|
vec![
|
|
ev_response_created("resp-2"),
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
],
|
|
]])
|
|
.await;
|
|
|
|
let mut builder = test_codex().with_windows_cmd_shell().with_config(|config| {
|
|
config
|
|
.features
|
|
.enable(Feature::ResponsesWebsocketsV2)
|
|
.expect("test config should allow feature update");
|
|
});
|
|
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
test.submit_turn_with_policy("run the echo command", test.config.legacy_sandbox_policy())
|
|
.await?;
|
|
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 3);
|
|
|
|
let warmup = connection
|
|
.first()
|
|
.expect("missing warmup request")
|
|
.body_json();
|
|
let first_turn = connection
|
|
.get(1)
|
|
.expect("missing first turn request")
|
|
.body_json();
|
|
let second_turn = connection
|
|
.get(2)
|
|
.expect("missing second turn request")
|
|
.body_json();
|
|
|
|
assert_eq!(warmup["type"].as_str(), Some("response.create"));
|
|
assert_eq!(warmup["generate"].as_bool(), Some(false));
|
|
assert_eq!(first_turn["type"].as_str(), Some("response.create"));
|
|
assert_eq!(first_turn["previous_response_id"].as_str(), Some("warm-1"));
|
|
assert!(
|
|
first_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.is_some_and(|items| !items.is_empty())
|
|
);
|
|
assert_eq!(second_turn["type"].as_str(), Some("response.create"));
|
|
assert_eq!(second_turn["previous_response_id"].as_str(), Some("resp-1"));
|
|
|
|
let create_items = second_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.expect("response.create input array");
|
|
assert!(!create_items.is_empty());
|
|
|
|
let output_item = create_items
|
|
.iter()
|
|
.find(|item| item.get("type").and_then(Value::as_str) == Some("function_call_output"))
|
|
.expect("function_call_output in create");
|
|
assert_eq!(
|
|
output_item.get("call_id").and_then(Value::as_str),
|
|
Some(call_id)
|
|
);
|
|
|
|
let handshake = server.single_handshake();
|
|
assert_eq!(
|
|
handshake.header("openai-beta"),
|
|
Some(WS_V2_BETA_HEADER_VALUE.to_string())
|
|
);
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_v2_first_turn_uses_updated_fast_tier_after_startup_prewarm() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_websocket_server(vec![vec![
|
|
vec![ev_response_created("warm-1"), ev_completed("warm-1")],
|
|
vec![
|
|
ev_response_created("resp-1"),
|
|
ev_assistant_message("msg-1", "fast"),
|
|
ev_completed("resp-1"),
|
|
],
|
|
]])
|
|
.await;
|
|
|
|
let mut builder = test_codex().with_config(|config| {
|
|
config
|
|
.features
|
|
.enable(Feature::ResponsesWebsocketsV2)
|
|
.expect("test config should allow feature update");
|
|
});
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
|
|
let warmup = server
|
|
.wait_for_request(/*connection_index*/ 0, /*request_index*/ 0)
|
|
.await
|
|
.body_json();
|
|
assert_eq!(warmup["type"].as_str(), Some("response.create"));
|
|
assert_eq!(warmup["generate"].as_bool(), Some(false));
|
|
assert_eq!(warmup.get("service_tier"), None);
|
|
|
|
test.submit_turn_with_service_tier("hello", Some(ServiceTier::Fast.request_value()))
|
|
.await?;
|
|
|
|
assert_eq!(server.handshakes().len(), 1);
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 2);
|
|
let first_turn = connection
|
|
.get(1)
|
|
.expect("missing first turn request")
|
|
.body_json();
|
|
|
|
assert_eq!(first_turn["type"].as_str(), Some("response.create"));
|
|
assert_eq!(first_turn["service_tier"].as_str(), Some("priority"));
|
|
assert_eq!(first_turn.get("previous_response_id"), None);
|
|
assert!(
|
|
first_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.is_some_and(|items| !items.is_empty())
|
|
);
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_v2_first_turn_drops_fast_tier_after_startup_prewarm() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_websocket_server(vec![vec![
|
|
vec![ev_response_created("warm-1"), ev_completed("warm-1")],
|
|
vec![
|
|
ev_response_created("resp-1"),
|
|
ev_assistant_message("msg-1", "standard"),
|
|
ev_completed("resp-1"),
|
|
],
|
|
]])
|
|
.await;
|
|
|
|
let mut builder = test_codex().with_config(|config| {
|
|
config
|
|
.features
|
|
.enable(Feature::ResponsesWebsocketsV2)
|
|
.expect("test config should allow feature update");
|
|
config.service_tier = Some(ServiceTier::Fast.request_value().to_string());
|
|
});
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
|
|
let warmup = server
|
|
.wait_for_request(/*connection_index*/ 0, /*request_index*/ 0)
|
|
.await
|
|
.body_json();
|
|
assert_eq!(warmup["type"].as_str(), Some("response.create"));
|
|
assert_eq!(warmup["generate"].as_bool(), Some(false));
|
|
assert_eq!(warmup["service_tier"].as_str(), Some("priority"));
|
|
|
|
test.submit_turn_with_service_tier("hello", /*service_tier*/ None)
|
|
.await?;
|
|
|
|
assert_eq!(server.handshakes().len(), 1);
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 2);
|
|
let first_turn = connection
|
|
.get(1)
|
|
.expect("missing first turn request")
|
|
.body_json();
|
|
|
|
assert_eq!(first_turn["type"].as_str(), Some("response.create"));
|
|
assert_eq!(first_turn.get("service_tier"), None);
|
|
assert_eq!(first_turn.get("previous_response_id"), None);
|
|
assert!(
|
|
first_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.is_some_and(|items| !items.is_empty())
|
|
);
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn websocket_v2_next_turn_uses_updated_service_tier() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_websocket_server(vec![vec![
|
|
vec![ev_response_created("warm-1"), ev_completed("warm-1")],
|
|
vec![
|
|
ev_response_created("resp-1"),
|
|
ev_assistant_message("msg-1", "fast"),
|
|
ev_completed("resp-1"),
|
|
],
|
|
vec![
|
|
ev_response_created("resp-2"),
|
|
ev_assistant_message("msg-2", "standard"),
|
|
ev_completed("resp-2"),
|
|
],
|
|
]])
|
|
.await;
|
|
|
|
let mut builder = test_codex().with_config(|config| {
|
|
config
|
|
.features
|
|
.enable(Feature::ResponsesWebsocketsV2)
|
|
.expect("test config should allow feature update");
|
|
});
|
|
let test = builder.build_with_websocket_server(&server).await?;
|
|
|
|
let warmup = server
|
|
.wait_for_request(/*connection_index*/ 0, /*request_index*/ 0)
|
|
.await
|
|
.body_json();
|
|
assert_eq!(warmup["type"].as_str(), Some("response.create"));
|
|
assert_eq!(warmup["generate"].as_bool(), Some(false));
|
|
assert_eq!(warmup.get("service_tier"), None);
|
|
|
|
test.submit_turn_with_service_tier("first", Some(ServiceTier::Fast.request_value()))
|
|
.await?;
|
|
test.submit_turn_with_service_tier("second", /*service_tier*/ None)
|
|
.await?;
|
|
|
|
assert_eq!(server.handshakes().len(), 1);
|
|
let connection = server.single_connection();
|
|
assert_eq!(connection.len(), 3);
|
|
|
|
let first_turn = connection
|
|
.get(1)
|
|
.expect("missing first turn request")
|
|
.body_json();
|
|
let second_turn = connection
|
|
.get(2)
|
|
.expect("missing second turn request")
|
|
.body_json();
|
|
|
|
assert_eq!(first_turn["type"].as_str(), Some("response.create"));
|
|
assert_eq!(first_turn["service_tier"].as_str(), Some("priority"));
|
|
assert_eq!(first_turn.get("previous_response_id"), None);
|
|
assert!(
|
|
first_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.is_some_and(|items| !items.is_empty())
|
|
);
|
|
|
|
assert_eq!(second_turn["type"].as_str(), Some("response.create"));
|
|
assert_eq!(second_turn.get("service_tier"), None);
|
|
assert_eq!(second_turn.get("previous_response_id"), None);
|
|
assert!(
|
|
second_turn
|
|
.get("input")
|
|
.and_then(Value::as_array)
|
|
.is_some_and(|items| !items.is_empty())
|
|
);
|
|
|
|
server.shutdown().await;
|
|
Ok(())
|
|
}
|