avoid cloning websocket request history (#28313)

## Why

WebSocket continuations only send the new part of a request. Checking
whether a request could be continued was cloning the full previous
request, the current request, and their input history.

For long conversations or large tool lists, that meant copying several
request-sized values on every continuation.

## What changed

- compare the request settings by reference
- check the previous input and server response as borrowed prefixes
- allocate only the new input items that will be sent

The reuse rules stay the same, including ignoring `client_metadata` for
this check.

The comparison is still `O(n)`, but it removes several `O(n)`
allocations and copies. Temporary memory no longer grows by multiple
full request sizes for each continuation.

## Performance

Local rollout traces show continuation checks on turns around 260k input
tokens. Before this change the reuse gate cloned the previous request,
the current request, and the previous input history before deciding
whether it could continue incrementally. After this change it borrows
those structures and allocates only the incremental tail. For large
continuations with a small delta, that removes roughly three
request-sized copies from the hot path and reduces temporary memory from
multiple full request sizes to just the new tail.

## Validation

- `just test -p codex-core
responses_websocket_v2_creates_with_previous_response_id_on_prefix`
- `just test -p codex-core
responses_websocket_v2_creates_without_previous_response_id_when_non_input_fields_change`
This commit is contained in:
jif
2026-06-15 17:48:47 +01:00
committed by GitHub
Unverified
parent baddb5e686
commit 95765542c9
+70 -19
View File
@@ -266,6 +266,60 @@ struct WebsocketSession {
connection_reused: StdMutex<bool>,
}
// This is intentionally not a `PartialEq` implementation: request equality includes `input` and
// `client_metadata`, while websocket reuse compares the input separately and ignores metadata.
// Keep the destructuring exhaustive so new request fields require an explicit reuse decision.
fn responses_request_properties_match(
previous: &ResponsesApiRequest,
current: &ResponsesApiRequest,
) -> bool {
let ResponsesApiRequest {
model: previous_model,
instructions: previous_instructions,
input: _,
tools: previous_tools,
tool_choice: previous_tool_choice,
parallel_tool_calls: previous_parallel_tool_calls,
reasoning: previous_reasoning,
store: previous_store,
stream: previous_stream,
include: previous_include,
service_tier: previous_service_tier,
prompt_cache_key: previous_prompt_cache_key,
text: previous_text,
client_metadata: _,
} = previous;
let ResponsesApiRequest {
model: current_model,
instructions: current_instructions,
input: _,
tools: current_tools,
tool_choice: current_tool_choice,
parallel_tool_calls: current_parallel_tool_calls,
reasoning: current_reasoning,
store: current_store,
stream: current_stream,
include: current_include,
service_tier: current_service_tier,
prompt_cache_key: current_prompt_cache_key,
text: current_text,
client_metadata: _,
} = current;
previous_model == current_model
&& previous_instructions == current_instructions
&& previous_tools == current_tools
&& previous_tool_choice == current_tool_choice
&& previous_parallel_tool_calls == current_parallel_tool_calls
&& previous_reasoning == current_reasoning
&& previous_store == current_store
&& previous_stream == current_stream
&& previous_include == current_include
&& previous_service_tier == current_service_tier
&& previous_prompt_cache_key == current_prompt_cache_key
&& previous_text == current_text
}
impl WebsocketSession {
fn set_connection_reused(&self, connection_reused: bool) {
*self
@@ -991,31 +1045,28 @@ impl ModelClientSession {
// extension of the previous known input. Server-returned output items are treated as part
// of the baseline so we do not resend them.
let previous_request = self.websocket_session.last_request.as_ref()?;
let mut previous_without_input = previous_request.clone();
previous_without_input.input.clear();
previous_without_input.client_metadata = None;
let mut request_without_input = request.clone();
request_without_input.input.clear();
request_without_input.client_metadata = None;
if previous_without_input != request_without_input {
if !responses_request_properties_match(previous_request, request) {
trace!("incremental request failed, websocket reuse properties didn't match");
return None;
}
let mut baseline = previous_request.input.clone();
if let Some(last_response) = last_response {
baseline.extend(last_response.items_added.clone());
}
let baseline_len = baseline.len();
if request.input.starts_with(&baseline)
&& (allow_empty_delta || baseline_len < request.input.len())
{
Some(request.input[baseline_len..].to_vec())
} else {
let Some(after_previous_input) = request
.input
.strip_prefix(previous_request.input.as_slice())
else {
trace!("incremental request failed, items didn't match");
None
return None;
};
let response_items =
last_response.map_or(&[][..], |response| response.items_added.as_slice());
let Some(incremental_items) = after_previous_input.strip_prefix(response_items) else {
trace!("incremental request failed, items didn't match");
return None;
};
if !allow_empty_delta && incremental_items.is_empty() {
return None;
}
Some(incremental_items.to_vec())
}
fn get_last_response(&mut self) -> Option<LastResponse> {