mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
avoid cloning websocket request history (#28313)
## Why WebSocket continuations only send the new part of a request. Checking whether a request could be continued was cloning the full previous request, the current request, and their input history. For long conversations or large tool lists, that meant copying several request-sized values on every continuation. ## What changed - compare the request settings by reference - check the previous input and server response as borrowed prefixes - allocate only the new input items that will be sent The reuse rules stay the same, including ignoring `client_metadata` for this check. The comparison is still `O(n)`, but it removes several `O(n)` allocations and copies. Temporary memory no longer grows by multiple full request sizes for each continuation. ## Performance Local rollout traces show continuation checks on turns around 260k input tokens. Before this change the reuse gate cloned the previous request, the current request, and the previous input history before deciding whether it could continue incrementally. After this change it borrows those structures and allocates only the incremental tail. For large continuations with a small delta, that removes roughly three request-sized copies from the hot path and reduces temporary memory from multiple full request sizes to just the new tail. ## Validation - `just test -p codex-core responses_websocket_v2_creates_with_previous_response_id_on_prefix` - `just test -p codex-core responses_websocket_v2_creates_without_previous_response_id_when_non_input_fields_change`
This commit is contained in:
+70
-19
@@ -266,6 +266,60 @@ struct WebsocketSession {
|
||||
connection_reused: StdMutex<bool>,
|
||||
}
|
||||
|
||||
// This is intentionally not a `PartialEq` implementation: request equality includes `input` and
|
||||
// `client_metadata`, while websocket reuse compares the input separately and ignores metadata.
|
||||
// Keep the destructuring exhaustive so new request fields require an explicit reuse decision.
|
||||
fn responses_request_properties_match(
|
||||
previous: &ResponsesApiRequest,
|
||||
current: &ResponsesApiRequest,
|
||||
) -> bool {
|
||||
let ResponsesApiRequest {
|
||||
model: previous_model,
|
||||
instructions: previous_instructions,
|
||||
input: _,
|
||||
tools: previous_tools,
|
||||
tool_choice: previous_tool_choice,
|
||||
parallel_tool_calls: previous_parallel_tool_calls,
|
||||
reasoning: previous_reasoning,
|
||||
store: previous_store,
|
||||
stream: previous_stream,
|
||||
include: previous_include,
|
||||
service_tier: previous_service_tier,
|
||||
prompt_cache_key: previous_prompt_cache_key,
|
||||
text: previous_text,
|
||||
client_metadata: _,
|
||||
} = previous;
|
||||
let ResponsesApiRequest {
|
||||
model: current_model,
|
||||
instructions: current_instructions,
|
||||
input: _,
|
||||
tools: current_tools,
|
||||
tool_choice: current_tool_choice,
|
||||
parallel_tool_calls: current_parallel_tool_calls,
|
||||
reasoning: current_reasoning,
|
||||
store: current_store,
|
||||
stream: current_stream,
|
||||
include: current_include,
|
||||
service_tier: current_service_tier,
|
||||
prompt_cache_key: current_prompt_cache_key,
|
||||
text: current_text,
|
||||
client_metadata: _,
|
||||
} = current;
|
||||
|
||||
previous_model == current_model
|
||||
&& previous_instructions == current_instructions
|
||||
&& previous_tools == current_tools
|
||||
&& previous_tool_choice == current_tool_choice
|
||||
&& previous_parallel_tool_calls == current_parallel_tool_calls
|
||||
&& previous_reasoning == current_reasoning
|
||||
&& previous_store == current_store
|
||||
&& previous_stream == current_stream
|
||||
&& previous_include == current_include
|
||||
&& previous_service_tier == current_service_tier
|
||||
&& previous_prompt_cache_key == current_prompt_cache_key
|
||||
&& previous_text == current_text
|
||||
}
|
||||
|
||||
impl WebsocketSession {
|
||||
fn set_connection_reused(&self, connection_reused: bool) {
|
||||
*self
|
||||
@@ -991,31 +1045,28 @@ impl ModelClientSession {
|
||||
// extension of the previous known input. Server-returned output items are treated as part
|
||||
// of the baseline so we do not resend them.
|
||||
let previous_request = self.websocket_session.last_request.as_ref()?;
|
||||
let mut previous_without_input = previous_request.clone();
|
||||
previous_without_input.input.clear();
|
||||
previous_without_input.client_metadata = None;
|
||||
let mut request_without_input = request.clone();
|
||||
request_without_input.input.clear();
|
||||
request_without_input.client_metadata = None;
|
||||
if previous_without_input != request_without_input {
|
||||
if !responses_request_properties_match(previous_request, request) {
|
||||
trace!("incremental request failed, websocket reuse properties didn't match");
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut baseline = previous_request.input.clone();
|
||||
if let Some(last_response) = last_response {
|
||||
baseline.extend(last_response.items_added.clone());
|
||||
}
|
||||
|
||||
let baseline_len = baseline.len();
|
||||
if request.input.starts_with(&baseline)
|
||||
&& (allow_empty_delta || baseline_len < request.input.len())
|
||||
{
|
||||
Some(request.input[baseline_len..].to_vec())
|
||||
} else {
|
||||
let Some(after_previous_input) = request
|
||||
.input
|
||||
.strip_prefix(previous_request.input.as_slice())
|
||||
else {
|
||||
trace!("incremental request failed, items didn't match");
|
||||
None
|
||||
return None;
|
||||
};
|
||||
let response_items =
|
||||
last_response.map_or(&[][..], |response| response.items_added.as_slice());
|
||||
let Some(incremental_items) = after_previous_input.strip_prefix(response_items) else {
|
||||
trace!("incremental request failed, items didn't match");
|
||||
return None;
|
||||
};
|
||||
if !allow_empty_delta && incremental_items.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(incremental_items.to_vec())
|
||||
}
|
||||
|
||||
fn get_last_response(&mut self) -> Option<LastResponse> {
|
||||
|
||||
Reference in New Issue
Block a user