From a22706dfae2cedfe6056be800ff3d531468e999a Mon Sep 17 00:00:00 2001 From: sayan-oai Date: Tue, 26 May 2026 11:12:24 -0700 Subject: [PATCH] standalone websearch extension (#23823) ## Summary Add the extension-backed standalone `web.run` tool so Codex can call the standalone search endpoint through the `codex-api` search client and return its encrypted output to Responses. - gate the new tool behind `standalone_web_search` - install the extension in the app-server thread registry and hide hosted `web_search` when standalone search is enabled for OpenAI providers so the two paths stay mutually exclusive - build search context from persisted history using a small tail heuristic: previous user message, assistant text between the last two user turns capped at about 1k tokens, and current user message ## Test Plan - `cargo test -p codex-web-search-extension` - `cargo test -p codex-api` - `cargo test -p codex-core hosted_tools_follow_provider_auth_model_and_config_gates` --- codex-rs/Cargo.lock | 22 ++ codex-rs/Cargo.toml | 2 + codex-rs/app-server/Cargo.toml | 1 + codex-rs/app-server/src/extensions.rs | 3 + codex-rs/app-server/src/mcp_refresh.rs | 5 +- codex-rs/app-server/src/message_processor.rs | 1 + codex-rs/app-server/tests/suite/v2/mod.rs | 1 + .../app-server/tests/suite/v2/web_search.rs | 216 ++++++++++++++++++ codex-rs/codex-api/Cargo.toml | 1 + codex-rs/codex-api/src/search.rs | 69 ++++-- codex-rs/core/config.schema.json | 6 + codex-rs/core/src/tools/spec_plan.rs | 18 +- codex-rs/core/src/tools/spec_plan_tests.rs | 57 ++++- codex-rs/ext/web-search/BUILD.bazel | 9 + codex-rs/ext/web-search/Cargo.toml | 31 +++ codex-rs/ext/web-search/src/extension.rs | 173 ++++++++++++++ codex-rs/ext/web-search/src/history.rs | 170 ++++++++++++++ codex-rs/ext/web-search/src/lib.rs | 7 + codex-rs/ext/web-search/src/output.rs | 72 ++++++ codex-rs/ext/web-search/src/schema.rs | 36 +++ codex-rs/ext/web-search/src/tool.rs | 113 +++++++++ .../ext/web-search/web_run_description.md | 80 +++++++ codex-rs/features/src/lib.rs | 8 + codex-rs/protocol/src/models.rs | 7 + codex-rs/tools/src/lib.rs | 3 + codex-rs/tools/src/response_history.rs | 149 ++++++++++++ 26 files changed, 1238 insertions(+), 22 deletions(-) create mode 100644 codex-rs/app-server/tests/suite/v2/web_search.rs create mode 100644 codex-rs/ext/web-search/BUILD.bazel create mode 100644 codex-rs/ext/web-search/Cargo.toml create mode 100644 codex-rs/ext/web-search/src/extension.rs create mode 100644 codex-rs/ext/web-search/src/history.rs create mode 100644 codex-rs/ext/web-search/src/lib.rs create mode 100644 codex-rs/ext/web-search/src/output.rs create mode 100644 codex-rs/ext/web-search/src/schema.rs create mode 100644 codex-rs/ext/web-search/src/tool.rs create mode 100644 codex-rs/ext/web-search/web_run_description.md create mode 100644 codex-rs/tools/src/response_history.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 78acd3478..a1884ba9e 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1859,6 +1859,7 @@ dependencies = [ "pretty_assertions", "regex-lite", "reqwest", + "schemars 0.8.22", "serde", "serde_json", "tempfile", @@ -1927,6 +1928,7 @@ dependencies = [ "codex-utils-cli", "codex-utils-json-to-toml", "codex-utils-pty", + "codex-web-search-extension", "core_test_support", "flate2", "futures", @@ -4117,6 +4119,26 @@ dependencies = [ "v8", ] +[[package]] +name = "codex-web-search-extension" +version = "0.0.0" +dependencies = [ + "async-trait", + "codex-api", + "codex-core", + "codex-extension-api", + "codex-features", + "codex-login", + "codex-model-provider", + "codex-model-provider-info", + "codex-protocol", + "codex-tools", + "http 1.4.0", + "pretty_assertions", + "schemars 0.8.22", + "serde_json", +] + [[package]] name = "codex-windows-sandbox" version = "0.0.0" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 830e24408..6c9737e4a 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -48,6 +48,7 @@ members = [ "ext/goal", "ext/guardian", "ext/memories", + "ext/web-search", "external-agent-migration", "external-agent-sessions", "keyring-store", @@ -180,6 +181,7 @@ codex-lmstudio = { path = "lmstudio" } codex-login = { path = "login" } codex-message-history = { path = "message-history" } codex-memories-extension = { path = "ext/memories" } +codex-web-search-extension = { path = "ext/web-search" } codex-memories-read = { path = "memories/read" } codex-memories-write = { path = "memories/write" } codex-mcp = { path = "codex-mcp" } diff --git a/codex-rs/app-server/Cargo.toml b/codex-rs/app-server/Cargo.toml index 95baac4e9..17ecd7dc3 100644 --- a/codex-rs/app-server/Cargo.toml +++ b/codex-rs/app-server/Cargo.toml @@ -55,6 +55,7 @@ codex-file-search = { workspace = true } codex-chatgpt = { workspace = true } codex-login = { workspace = true } codex-memories-extension = { workspace = true } +codex-web-search-extension = { workspace = true } codex-memories-write = { workspace = true } codex-mcp = { workspace = true } codex-model-provider = { workspace = true } diff --git a/codex-rs/app-server/src/extensions.rs b/codex-rs/app-server/src/extensions.rs index 9232cb758..1246da7b3 100644 --- a/codex-rs/app-server/src/extensions.rs +++ b/codex-rs/app-server/src/extensions.rs @@ -12,6 +12,7 @@ use codex_extension_api::AgentSpawner; use codex_extension_api::ExtensionEventSink; use codex_extension_api::ExtensionRegistry; use codex_extension_api::ExtensionRegistryBuilder; +use codex_login::AuthManager; use codex_protocol::ThreadId; use codex_protocol::error::CodexErr; use codex_protocol::protocol::Event; @@ -22,6 +23,7 @@ use crate::outgoing_message::OutgoingMessageSender; pub(crate) fn thread_extensions( guardian_agent_spawner: S, event_sink: Arc, + auth_manager: Arc, ) -> Arc> where S: AgentSpawner + 'static, @@ -29,6 +31,7 @@ where let mut builder = ExtensionRegistryBuilder::::with_event_sink(event_sink); codex_guardian::install(&mut builder, guardian_agent_spawner); codex_memories_extension::install(&mut builder, codex_otel::global()); + codex_web_search_extension::install(&mut builder, auth_manager); Arc::new(builder.build()) } diff --git a/codex-rs/app-server/src/mcp_refresh.rs b/codex-rs/app-server/src/mcp_refresh.rs index d65b1d83e..4ddfa3473 100644 --- a/codex-rs/app-server/src/mcp_refresh.rs +++ b/codex-rs/app-server/src/mcp_refresh.rs @@ -184,15 +184,16 @@ mod tests { let thread_manager = Arc::new_cyclic(|thread_manager| { ThreadManager::new( &good_config, - auth_manager, + auth_manager.clone(), SessionSource::Exec, Arc::new(EnvironmentManager::default_for_tests()), thread_extensions( guardian_agent_spawner(thread_manager.clone()), Arc::new(NoopExtensionEventSink), + auth_manager.clone(), ), /*analytics_events_client*/ None, - thread_store, + Arc::clone(&thread_store), Some(state_db.clone()), "11111111-1111-4111-8111-111111111111".to_string(), /*attestation_provider*/ None, diff --git a/codex-rs/app-server/src/message_processor.rs b/codex-rs/app-server/src/message_processor.rs index 2d0e8d085..33b9c56d4 100644 --- a/codex-rs/app-server/src/message_processor.rs +++ b/codex-rs/app-server/src/message_processor.rs @@ -314,6 +314,7 @@ impl MessageProcessor { thread_extensions( guardian_agent_spawner(thread_manager.clone()), app_server_extension_event_sink(outgoing.clone()), + auth_manager.clone(), ), Some(analytics_events_client.clone()), Arc::clone(&thread_store), diff --git a/codex-rs/app-server/tests/suite/v2/mod.rs b/codex-rs/app-server/tests/suite/v2/mod.rs index 92e0f126f..160598370 100644 --- a/codex-rs/app-server/tests/suite/v2/mod.rs +++ b/codex-rs/app-server/tests/suite/v2/mod.rs @@ -68,4 +68,5 @@ mod turn_interrupt; mod turn_start; mod turn_start_zsh_fork; mod turn_steer; +mod web_search; mod windows_sandbox_setup; diff --git a/codex-rs/app-server/tests/suite/v2/web_search.rs b/codex-rs/app-server/tests/suite/v2/web_search.rs new file mode 100644 index 000000000..52c327a93 --- /dev/null +++ b/codex-rs/app-server/tests/suite/v2/web_search.rs @@ -0,0 +1,216 @@ +use std::path::Path; +use std::time::Duration; + +use anyhow::Context; +use anyhow::Result; +use app_test_support::ChatGptAuthFixture; +use app_test_support::McpProcess; +use app_test_support::to_response; +use app_test_support::write_chatgpt_auth; +use codex_app_server_protocol::JSONRPCResponse; +use codex_app_server_protocol::RequestId; +use codex_app_server_protocol::ThreadStartParams; +use codex_app_server_protocol::ThreadStartResponse; +use codex_app_server_protocol::TurnStartParams; +use codex_app_server_protocol::TurnStartResponse; +use codex_app_server_protocol::UserInput as V2UserInput; +use codex_config::types::AuthCredentialsStoreMode; +use core_test_support::responses; +use pretty_assertions::assert_eq; +use serde_json::Value; +use serde_json::json; +use tempfile::TempDir; +use tokio::time::timeout; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +// macOS and Windows Bazel CI can spend tens of seconds starting app-server +// subprocesses or processing test RPCs under load. +#[cfg(any(target_os = "macos", windows))] +const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(60); +#[cfg(not(any(target_os = "macos", windows)))] +const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10); + +#[tokio::test] +async fn standalone_web_search_round_trips_encrypted_output() -> Result<()> { + let call_id = "web-run-1"; + let server = responses::start_mock_server().await; + mount_search_response(&server).await; + + let response_mock = responses::mount_sse_sequence( + &server, + vec![ + responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call_with_namespace( + call_id, + "web", + "run", + &json!({ + "search_query": [{"q": "standalone web search"}], + }) + .to_string(), + ), + responses::ev_completed("resp-1"), + ]), + responses::sse(vec![ + responses::ev_assistant_message("msg-1", "Done"), + responses::ev_completed("resp-2"), + ]), + ], + ) + .await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + write_chatgpt_auth( + codex_home.path(), + ChatGptAuthFixture::new("access-chatgpt"), + AuthCredentialsStoreMode::File, + )?; + + let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams::default()) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id, + input: vec![V2UserInput::Text { + text: "Search the web".to_string(), + text_elements: Vec::new(), + }], + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + let _turn: TurnStartResponse = to_response::(turn_resp)?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let requests = response_mock.requests(); + assert_eq!(requests.len(), 2); + + let first_response = requests[0].body_json(); + assert!( + requests[0].tool_by_name("web", "run").is_some(), + "web.run should be sent to the model" + ); + assert!( + !has_hosted_web_search(&first_response), + "standalone web search should replace hosted web search" + ); + + let search_body = search_request_body(&server).await?; + assert_eq!( + search_body["commands"], + json!({ + "search_query": [{"q": "standalone web search"}], + }) + ); + assert_eq!( + search_body["input"] + .as_array() + .context("search input should be an array")? + .last(), + Some(&json!({ + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": "Search the web"}], + })) + ); + + assert_eq!( + requests[1].function_call_output(call_id), + json!({ + "type": "function_call_output", + "call_id": call_id, + "output": [{ + "type": "encrypted_content", + "encrypted_content": "ciphertext", + }], + }) + ); + + Ok(()) +} + +async fn mount_search_response(server: &MockServer) { + Mock::given(method("POST")) + .and(path("/api/codex/alpha/search")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "encrypted_output": "ciphertext", + }))) + .expect(1) + .mount(server) + .await; +} + +fn has_hosted_web_search(body: &Value) -> bool { + body.get("tools") + .and_then(Value::as_array) + .is_some_and(|tools| { + tools + .iter() + .any(|tool| tool.get("type").and_then(Value::as_str) == Some("web_search")) + }) +} + +async fn search_request_body(server: &MockServer) -> Result { + server + .received_requests() + .await + .context("failed to fetch received requests")? + .into_iter() + .find(|request| request.url.path() == "/api/codex/alpha/search") + .context("expected standalone search request")? + .body_json() + .context("search request body should be JSON") +} + +fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> { + std::fs::write( + codex_home.join("config.toml"), + format!( + r#" +model = "mock-model" +approval_policy = "never" +sandbox_mode = "read-only" +model_provider = "openai-custom" +chatgpt_base_url = "{server_uri}" + +[features] +standalone_web_search = true + +[model_providers.openai-custom] +name = "OpenAI" +base_url = "{server_uri}/api/codex" +wire_api = "responses" +request_max_retries = 0 +stream_max_retries = 0 +supports_websockets = false +requires_openai_auth = true +"# + ), + ) +} diff --git a/codex-rs/codex-api/Cargo.toml b/codex-rs/codex-api/Cargo.toml index 08f70cf33..07d855725 100644 --- a/codex-rs/codex-api/Cargo.toml +++ b/codex-rs/codex-api/Cargo.toml @@ -16,6 +16,7 @@ codex-utils-rustls-provider = { workspace = true } futures = { workspace = true } http = { workspace = true } reqwest = { workspace = true, features = ["json", "stream"] } +schemars = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/codex-rs/codex-api/src/search.rs b/codex-rs/codex-api/src/search.rs index b841d06a3..8328415f8 100644 --- a/codex-rs/codex-api/src/search.rs +++ b/codex-rs/codex-api/src/search.rs @@ -1,5 +1,6 @@ use crate::common::Reasoning; use codex_protocol::models::ResponseItem; +use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -27,75 +28,100 @@ pub enum SearchInput { Items(Vec), } -#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, JsonSchema)] pub struct SearchCommands { + /// Query the internet search engine for a given list of queries. #[serde(skip_serializing_if = "Option::is_none")] pub search_query: Option>, + /// Query the image search engine for a given list of queries. #[serde(skip_serializing_if = "Option::is_none")] pub image_query: Option>, + /// Open pages by reference id or URL. #[serde(skip_serializing_if = "Option::is_none")] pub open: Option>, + /// Open links from previously opened pages. #[serde(skip_serializing_if = "Option::is_none")] pub click: Option>, + /// Find text patterns in pages. #[serde(skip_serializing_if = "Option::is_none")] pub find: Option>, + /// Take screenshots of PDF pages. #[serde(skip_serializing_if = "Option::is_none")] pub screenshot: Option>, + /// Look up prices for the given stock symbols. #[serde(skip_serializing_if = "Option::is_none")] pub finance: Option>, + /// Look up weather forecasts. #[serde(skip_serializing_if = "Option::is_none")] pub weather: Option>, + /// Look up sports schedules and standings. #[serde(skip_serializing_if = "Option::is_none")] pub sports: Option>, + /// Get time for the given UTC offsets. #[serde(skip_serializing_if = "Option::is_none")] pub time: Option>, + /// Set the length of the response to be returned. #[serde(skip_serializing_if = "Option::is_none")] pub response_length: Option, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct SearchQuery { + /// Search query. pub q: String, + /// Whether to filter by recency, as a number of recent days. #[serde(skip_serializing_if = "Option::is_none")] pub recency: Option, + /// Whether to filter by a specific list of domains. #[serde(skip_serializing_if = "Option::is_none")] pub domains: Option>, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct OpenOperation { + /// Reference id or URL to open. pub ref_id: String, + /// Line number to position the page at. #[serde(skip_serializing_if = "Option::is_none")] pub lineno: Option, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct ClickOperation { + /// Reference id containing the numbered link. pub ref_id: String, + /// Numbered link id to open. pub id: u64, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct FindOperation { + /// Reference id or URL to search within. pub ref_id: String, + /// Text pattern to find. pub pattern: String, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct ScreenshotOperation { + /// Reference id or URL to screenshot. pub ref_id: String, + /// Zero-indexed PDF page number. pub pageno: u64, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct FinanceOperation { + /// Ticker symbol to look up. pub ticker: String, + /// Asset type to look up. pub r#type: FinanceAssetType, + /// ISO 3166-1 alpha-3 country code, "OTC", or "" for cryptocurrency. #[serde(skip_serializing_if = "Option::is_none")] pub market: Option, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum FinanceAssetType { Equity, @@ -104,49 +130,61 @@ pub enum FinanceAssetType { Index, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct WeatherOperation { + /// Location in "Country, Area, City" format. pub location: String, + /// Start date in YYYY-MM-DD format. Defaults to today. #[serde(skip_serializing_if = "Option::is_none")] pub start: Option, + /// Number of days to return. Defaults to 7. #[serde(skip_serializing_if = "Option::is_none")] pub duration: Option, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct SportsOperation { + /// Tool name for sports requests. #[serde(skip_serializing_if = "Option::is_none")] pub tool: Option, + /// Sports function to call. pub r#fn: SportsFunction, + /// League to look up. pub league: SportsLeague, + /// Team to look up, using the common 3 or 4 letter alias used in broadcasts. #[serde(skip_serializing_if = "Option::is_none")] pub team: Option, + /// Opponent to use with `team` when narrowing the lookup. #[serde(skip_serializing_if = "Option::is_none")] pub opponent: Option, + /// Start date in YYYY-MM-DD format. #[serde(skip_serializing_if = "Option::is_none")] pub date_from: Option, + /// End date in YYYY-MM-DD format. #[serde(skip_serializing_if = "Option::is_none")] pub date_to: Option, + /// Number of games to return. #[serde(skip_serializing_if = "Option::is_none")] pub num_games: Option, + /// Locale for the lookup. #[serde(skip_serializing_if = "Option::is_none")] pub locale: Option, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SportsToolName { Sports, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SportsFunction { Schedule, Standings, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SportsLeague { Nba, @@ -160,12 +198,13 @@ pub enum SportsLeague { Ipl, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct TimeOperation { + /// UTC offset formatted like "+03:00". pub utc_offset: String, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SearchResponseLength { Short, diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index f6e15c806..6af30b407 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -575,6 +575,9 @@ "sqlite": { "type": "boolean" }, + "standalone_web_search": { + "type": "boolean" + }, "steer": { "type": "boolean" }, @@ -4505,6 +4508,9 @@ "sqlite": { "type": "boolean" }, + "standalone_web_search": { + "type": "boolean" + }, "steer": { "type": "boolean" }, diff --git a/codex-rs/core/src/tools/spec_plan.rs b/codex-rs/core/src/tools/spec_plan.rs index c3656f243..2610bee95 100644 --- a/codex-rs/core/src/tools/spec_plan.rs +++ b/codex-rs/core/src/tools/spec_plan.rs @@ -238,11 +238,12 @@ fn spec_for_model_request( } } -pub(crate) fn hosted_model_tool_specs(turn_context: &TurnContext) -> Vec { +fn hosted_model_tool_specs(context: &CoreToolPlanContext<'_>) -> Vec { + let turn_context = context.turn_context; let mut specs = Vec::new(); let provider_capabilities = turn_context.provider.capabilities(); - let web_search_mode = provider_capabilities - .web_search + let web_search_mode = (!standalone_web_run_available(context.extension_tool_executors) + && provider_capabilities.web_search) .then_some(turn_context.config.web_search_mode.value()); let web_search_config = if provider_capabilities.web_search { turn_context.config.web_search_config.as_ref() @@ -504,11 +505,20 @@ fn add_tool_sources(context: &CoreToolPlanContext<'_>, planned_tools: &mut Plann add_mcp_runtime_tools(context, planned_tools); add_dynamic_tools(context, planned_tools); add_extension_tools(context, planned_tools); - for spec in hosted_model_tool_specs(context.turn_context) { + for spec in hosted_model_tool_specs(context) { planned_tools.add_hosted_spec(spec); } } +fn standalone_web_run_available( + extension_tools: &[Arc>], +) -> bool { + let web_run = ToolName::namespaced("web", "run"); + extension_tools + .iter() + .any(|executor| executor.tool_name() == web_run) +} + fn add_shell_tools(context: &CoreToolPlanContext<'_>, planned_tools: &mut PlannedTools) { let turn_context = context.turn_context; let features = turn_context.features.get(); diff --git a/codex-rs/core/src/tools/spec_plan_tests.rs b/codex-rs/core/src/tools/spec_plan_tests.rs index 59d695489..2eb6eea5d 100644 --- a/codex-rs/core/src/tools/spec_plan_tests.rs +++ b/codex-rs/core/src/tools/spec_plan_tests.rs @@ -20,8 +20,11 @@ use codex_tools::DiscoverablePluginInfo; use codex_tools::DiscoverableTool; use codex_tools::ResponsesApiNamespaceTool; use codex_tools::ResponsesApiTool; +use codex_tools::ToolCall as ExtensionToolCall; +use codex_tools::ToolExecutor; use codex_tools::ToolExposure; use codex_tools::ToolName; +use codex_tools::ToolOutput; use codex_tools::ToolSpec; use pretty_assertions::assert_eq; use serde_json::json; @@ -37,6 +40,7 @@ struct ToolPlanInputs { mcp_tools: Option>, deferred_mcp_tools: Option>, discoverable_tools: Option>, + extension_tool_executors: Vec>>, dynamic_tools: Vec, } @@ -176,7 +180,7 @@ async fn probe_with( mcp_tools: inputs.mcp_tools, deferred_mcp_tools: inputs.deferred_mcp_tools, discoverable_tools: inputs.discoverable_tools, - extension_tool_executors: Vec::new(), + extension_tool_executors: inputs.extension_tool_executors, dynamic_tools: inputs.dynamic_tools.as_slice(), }, ); @@ -253,6 +257,37 @@ fn use_bedrock_provider(turn: &mut TurnContext) { turn.provider = create_model_provider(provider_info, turn.auth_manager.clone()); } +struct WebRunExtensionTool; + +#[async_trait::async_trait] +impl ToolExecutor for WebRunExtensionTool { + fn tool_name(&self) -> ToolName { + ToolName::namespaced("web", "run") + } + + fn spec(&self) -> ToolSpec { + ToolSpec::Namespace(codex_tools::ResponsesApiNamespace { + name: "web".to_string(), + description: "Test web namespace.".to_string(), + tools: vec![ResponsesApiNamespaceTool::Function(ResponsesApiTool { + name: "run".to_string(), + description: "Test standalone web search tool.".to_string(), + strict: false, + defer_loading: None, + parameters: codex_tools::JsonSchema::default(), + output_schema: None, + })], + }) + } + + async fn handle( + &self, + _call: ExtensionToolCall, + ) -> Result, codex_tools::FunctionCallError> { + Ok(Box::new(codex_tools::JsonToolOutput::new(json!({})))) + } +} + fn duplicate_primary_environment(turn: &mut TurnContext) { let mut second_environment = turn.environments.turn_environments[0].clone(); second_environment.environment_id = "secondary".to_string(); @@ -947,6 +982,26 @@ async fn hosted_tools_follow_provider_auth_model_and_config_gates() { } ); + let standalone_web_search_without_web_run = probe(|turn| { + set_feature(turn, Feature::StandaloneWebSearch, /*enabled*/ true); + set_web_search_mode(turn, WebSearchMode::Live); + }) + .await; + standalone_web_search_without_web_run.assert_visible_contains(&["web_search"]); + + let standalone_web_search = probe_with( + |turn| { + set_feature(turn, Feature::StandaloneWebSearch, /*enabled*/ true); + set_web_search_mode(turn, WebSearchMode::Live); + }, + ToolPlanInputs { + extension_tool_executors: vec![Arc::new(WebRunExtensionTool)], + ..Default::default() + }, + ) + .await; + standalone_web_search.assert_visible_lacks(&["web_search"]); + let unsupported_provider = probe(|turn| { set_web_search_mode(turn, WebSearchMode::Live); use_bedrock_provider(turn); diff --git a/codex-rs/ext/web-search/BUILD.bazel b/codex-rs/ext/web-search/BUILD.bazel new file mode 100644 index 000000000..e8c26644f --- /dev/null +++ b/codex-rs/ext/web-search/BUILD.bazel @@ -0,0 +1,9 @@ +load("//:defs.bzl", "codex_rust_crate") + +codex_rust_crate( + name = "web-search", + crate_name = "codex_web_search_extension", + compile_data = [ + "web_run_description.md", + ], +) diff --git a/codex-rs/ext/web-search/Cargo.toml b/codex-rs/ext/web-search/Cargo.toml new file mode 100644 index 000000000..978aa4c45 --- /dev/null +++ b/codex-rs/ext/web-search/Cargo.toml @@ -0,0 +1,31 @@ +[package] +edition.workspace = true +license.workspace = true +name = "codex-web-search-extension" +version.workspace = true + +[lib] +name = "codex_web_search_extension" +path = "src/lib.rs" +doctest = false + +[lints] +workspace = true + +[dependencies] +async-trait = { workspace = true } +codex-api = { workspace = true } +codex-core = { workspace = true } +codex-extension-api = { workspace = true } +codex-features = { workspace = true } +codex-login = { workspace = true } +codex-model-provider = { workspace = true } +codex-model-provider-info = { workspace = true } +codex-protocol = { workspace = true } +codex-tools = { workspace = true } +http = { workspace = true } +schemars = { workspace = true } +serde_json = { workspace = true } + +[dev-dependencies] +pretty_assertions = { workspace = true } diff --git a/codex-rs/ext/web-search/src/extension.rs b/codex-rs/ext/web-search/src/extension.rs new file mode 100644 index 000000000..228d77897 --- /dev/null +++ b/codex-rs/ext/web-search/src/extension.rs @@ -0,0 +1,173 @@ +use std::sync::Arc; + +use codex_api::ApproximateLocation; +use codex_api::LocationType; +use codex_api::SearchContextSize; +use codex_api::SearchFilters; +use codex_api::SearchSettings; +use codex_core::config::Config; +use codex_extension_api::ConfigContributor; +use codex_extension_api::ExtensionData; +use codex_extension_api::ExtensionRegistryBuilder; +use codex_extension_api::ThreadLifecycleContributor; +use codex_extension_api::ThreadStartInput; +use codex_extension_api::ToolContributor; +use codex_features::Feature; +use codex_login::AuthManager; +use codex_model_provider::create_model_provider; +use codex_model_provider_info::ModelProviderInfo; +use codex_protocol::config_types::WebSearchContextSize; +use codex_protocol::config_types::WebSearchMode; + +use crate::tool::WebSearchTool; + +#[derive(Clone)] +struct WebSearchExtension { + auth_manager: Arc, +} + +#[derive(Clone)] +struct WebSearchExtensionConfig { + enabled: bool, + provider: ModelProviderInfo, + settings: SearchSettings, +} + +impl From<&Config> for WebSearchExtensionConfig { + fn from(config: &Config) -> Self { + let web_search_mode = config.web_search_mode.value(); + Self { + enabled: config.features.enabled(Feature::StandaloneWebSearch) + && config.model_provider.is_openai() + && web_search_mode != WebSearchMode::Disabled, + provider: config.model_provider.clone(), + settings: search_settings(config, web_search_mode), + } + } +} + +fn search_settings(config: &Config, web_search_mode: WebSearchMode) -> SearchSettings { + let web_search_config = config.web_search_config.as_ref(); + SearchSettings { + user_location: web_search_config + .and_then(|config| config.user_location.as_ref()) + .map(|location| ApproximateLocation { + r#type: LocationType::Approximate, + country: location.country.clone(), + region: location.region.clone(), + city: location.city.clone(), + timezone: location.timezone.clone(), + }), + search_context_size: web_search_config + .and_then(|config| config.search_context_size) + .map(|size| match size { + WebSearchContextSize::Low => SearchContextSize::Low, + WebSearchContextSize::Medium => SearchContextSize::Medium, + WebSearchContextSize::High => SearchContextSize::High, + }), + filters: web_search_config + .and_then(|config| config.filters.as_ref()) + .map(|filters| SearchFilters { + allowed_domains: filters.allowed_domains.clone(), + blocked_domains: None, + }), + external_web_access: Some(match web_search_mode { + WebSearchMode::Live => true, + WebSearchMode::Cached | WebSearchMode::Disabled => false, + }), + ..Default::default() + } +} + +#[async_trait::async_trait] +impl ThreadLifecycleContributor for WebSearchExtension { + async fn on_thread_start(&self, input: ThreadStartInput<'_, Config>) { + input + .thread_store + .insert(WebSearchExtensionConfig::from(input.config)); + } +} + +impl ConfigContributor for WebSearchExtension { + fn on_config_changed( + &self, + _session_store: &ExtensionData, + thread_store: &ExtensionData, + _previous_config: &Config, + new_config: &Config, + ) { + thread_store.insert(WebSearchExtensionConfig::from(new_config)); + } +} + +impl ToolContributor for WebSearchExtension { + fn tools( + &self, + session_store: &ExtensionData, + thread_store: &ExtensionData, + ) -> Vec>> { + let Some(config) = thread_store.get::() else { + return Vec::new(); + }; + if !config.enabled { + return Vec::new(); + } + + vec![Arc::new(WebSearchTool { + session_id: session_store.level_id().to_string(), + provider: create_model_provider( + config.provider.clone(), + Some(self.auth_manager.clone()), + ), + settings: config.settings.clone(), + })] + } +} + +pub fn install(registry: &mut ExtensionRegistryBuilder, auth_manager: Arc) { + let extension = Arc::new(WebSearchExtension { auth_manager }); + registry.thread_lifecycle_contributor(extension.clone()); + registry.config_contributor(extension.clone()); + registry.tool_contributor(extension); +} + +#[cfg(test)] +mod tests { + use codex_extension_api::ExtensionData; + use codex_extension_api::ExtensionRegistryBuilder; + use codex_extension_api::ToolName; + use codex_login::CodexAuth; + use codex_model_provider_info::ModelProviderInfo; + use pretty_assertions::assert_eq; + + use super::AuthManager; + use super::Config; + use super::WebSearchExtensionConfig; + use super::install; + + #[test] + fn installed_extension_contributes_web_run_when_enabled() { + let mut builder = ExtensionRegistryBuilder::::new(); + install( + &mut builder, + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy")), + ); + let registry = builder.build(); + let session_store = ExtensionData::new("session"); + let thread_store = ExtensionData::new("11111111-1111-4111-8111-111111111111"); + thread_store.insert(WebSearchExtensionConfig { + enabled: true, + provider: ModelProviderInfo::create_openai_provider(/*base_url*/ None), + settings: Default::default(), + }); + + let tool_names = registry + .tool_contributors() + .iter() + .flat_map(|contributor| contributor.tools(&session_store, &thread_store)) + .map(|tool| tool.tool_name()) + .collect::>(); + + assert_eq!(tool_names, vec![ToolName::namespaced("web", "run")]); + } +} diff --git a/codex-rs/ext/web-search/src/history.rs b/codex-rs/ext/web-search/src/history.rs new file mode 100644 index 000000000..b7a26d32c --- /dev/null +++ b/codex-rs/ext/web-search/src/history.rs @@ -0,0 +1,170 @@ +use codex_api::SearchInput; +use codex_core::parse_turn_item; +use codex_protocol::items::TurnItem; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; +use codex_tools::retain_tail_from_last_n_user_messages; +use codex_tools::truncate_assistant_output_text_to_token_budget; + +const ASSISTANT_CONTEXT_TOKEN_LIMIT: usize = 1_000; +const ASSISTANT_ROLE: &str = "assistant"; +const USER_ROLE: &str = "user"; + +/// Builds the conversation tail for standalone web search. +/// +/// The tail keeps the previous user text message, up to 1k tokens of assistant +/// text that followed it, and the current user text message. +pub(crate) fn recent_input(items: &[ResponseItem]) -> Option { + let mut messages = Vec::new(); + for item in items { + push_visible_message(&mut messages, item); + } + + retain_tail_from_last_n_user_messages(&mut messages, /*user_message_count*/ 2); + truncate_assistant_output_text_to_token_budget(&mut messages, ASSISTANT_CONTEXT_TOKEN_LIMIT); + (!messages.is_empty()).then_some(SearchInput::Items(messages)) +} + +fn push_visible_message(messages: &mut Vec, item: &ResponseItem) { + match item { + ResponseItem::Message { role, .. } if role == ASSISTANT_ROLE => { + messages.push(item.clone()); + } + ResponseItem::Message { + id, + role, + content, + phase, + } if role == USER_ROLE + && matches!(parse_turn_item(item), Some(TurnItem::UserMessage(_))) => + { + let content = content + .iter() + .filter(|item| matches!(item, ContentItem::InputText { .. })) + .cloned() + .collect::>(); + if !content.is_empty() { + messages.push(ResponseItem::Message { + id: id.clone(), + role: role.clone(), + content, + phase: phase.clone(), + }); + } + } + _ => {} + } +} + +#[cfg(test)] +mod tests { + use codex_api::SearchInput; + use codex_protocol::models::ContentItem; + use codex_protocol::models::ResponseItem; + use pretty_assertions::assert_eq; + + use super::ASSISTANT_ROLE; + use super::USER_ROLE; + use super::recent_input; + + fn message(role: &str, text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: role.to_string(), + content: vec![if role == ASSISTANT_ROLE { + ContentItem::OutputText { + text: text.to_string(), + } + } else { + ContentItem::InputText { + text: text.to_string(), + } + }], + phase: None, + } + } + + #[test] + fn keeps_current_user_and_previous_visible_turn() { + let items = vec![ + message("system", "system"), + message(USER_ROLE, "old user"), + message(ASSISTANT_ROLE, "old assistant"), + message(USER_ROLE, "previous user"), + ResponseItem::FunctionCall { + id: None, + name: "tool".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: "call-1".to_string(), + }, + message(ASSISTANT_ROLE, "previous assistant"), + message("developer", "developer"), + message(USER_ROLE, "current user"), + message(ASSISTANT_ROLE, "current commentary"), + ]; + + assert_eq!( + recent_input(&items), + Some(SearchInput::Items(vec![ + message(USER_ROLE, "previous user"), + message(ASSISTANT_ROLE, "previous assistant"), + message(USER_ROLE, "current user"), + ])) + ); + } + + #[test] + fn keeps_only_text_from_recent_user_messages() { + let previous_user = ResponseItem::Message { + id: None, + role: USER_ROLE.to_string(), + content: vec![ + ContentItem::InputText { + text: "previous user".to_string(), + }, + ContentItem::InputImage { + image_url: "data:image/png;base64,image".to_string(), + detail: None, + }, + ], + phase: None, + }; + let items = vec![ + previous_user, + message(ASSISTANT_ROLE, "previous assistant"), + message(USER_ROLE, "current user"), + ]; + + assert_eq!( + recent_input(&items), + Some(SearchInput::Items(vec![ + message(USER_ROLE, "previous user"), + message(ASSISTANT_ROLE, "previous assistant"), + message(USER_ROLE, "current user"), + ])) + ); + } + + #[test] + fn ignores_contextual_user_messages_when_selecting_recent_turns() { + let items = vec![ + message(USER_ROLE, "previous user"), + message(ASSISTANT_ROLE, "previous assistant"), + message( + USER_ROLE, + "\n/tmp\n", + ), + message(USER_ROLE, "current user"), + ]; + + assert_eq!( + recent_input(&items), + Some(SearchInput::Items(vec![ + message(USER_ROLE, "previous user"), + message(ASSISTANT_ROLE, "previous assistant"), + message(USER_ROLE, "current user"), + ])) + ); + } +} diff --git a/codex-rs/ext/web-search/src/lib.rs b/codex-rs/ext/web-search/src/lib.rs new file mode 100644 index 000000000..1b18039e4 --- /dev/null +++ b/codex-rs/ext/web-search/src/lib.rs @@ -0,0 +1,7 @@ +mod extension; +mod history; +mod output; +mod schema; +mod tool; + +pub use extension::install; diff --git a/codex-rs/ext/web-search/src/output.rs b/codex-rs/ext/web-search/src/output.rs new file mode 100644 index 000000000..124271c21 --- /dev/null +++ b/codex-rs/ext/web-search/src/output.rs @@ -0,0 +1,72 @@ +use codex_extension_api::ToolOutput; +use codex_extension_api::ToolPayload; +use codex_protocol::models::FunctionCallOutputContentItem; +use codex_protocol::models::FunctionCallOutputPayload; +use codex_protocol::models::ResponseInputItem; + +pub(crate) struct EncryptedSearchOutput { + encrypted_output: String, +} + +impl EncryptedSearchOutput { + pub(crate) fn new(encrypted_output: String) -> Self { + Self { encrypted_output } + } +} + +impl ToolOutput for EncryptedSearchOutput { + fn log_preview(&self) -> String { + "[encrypted standalone web search output]".to_string() + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem { + // TODO: Make standalone search honor memories.disable_on_external_context, + // as hosted web search does. + ResponseInputItem::FunctionCallOutput { + call_id: call_id.to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::EncryptedContent { + encrypted_content: self.encrypted_output.clone(), + }, + ]), + } + } +} + +#[cfg(test)] +mod tests { + use codex_extension_api::ToolPayload; + use codex_protocol::models::FunctionCallOutputContentItem; + use codex_protocol::models::FunctionCallOutputPayload; + use codex_protocol::models::ResponseInputItem; + use pretty_assertions::assert_eq; + + use super::EncryptedSearchOutput; + use super::ToolOutput; + + #[test] + fn emits_encrypted_function_call_output() { + let output = EncryptedSearchOutput::new("encrypted-search-output".to_string()); + + assert_eq!( + output.to_response_item( + "call-1", + &ToolPayload::Function { + arguments: "{}".to_string(), + }, + ), + ResponseInputItem::FunctionCallOutput { + call_id: "call-1".to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::EncryptedContent { + encrypted_content: "encrypted-search-output".to_string(), + }, + ]), + } + ); + } +} diff --git a/codex-rs/ext/web-search/src/schema.rs b/codex-rs/ext/web-search/src/schema.rs new file mode 100644 index 000000000..2f71f1595 --- /dev/null +++ b/codex-rs/ext/web-search/src/schema.rs @@ -0,0 +1,36 @@ +use codex_api::SearchCommands; +use schemars::r#gen::SchemaSettings; +use serde_json::Map; +use serde_json::Value; + +pub(crate) fn commands_schema() -> Value { + let schema = SchemaSettings::draft2019_09() + .with(|settings| { + settings.inline_subschemas = true; + settings.option_add_null_type = false; + }) + .into_generator() + .into_root_schema_for::(); + let schema = match serde_json::to_value(schema) { + Ok(schema) => schema, + Err(err) => panic!("search commands schema should serialize: {err}"), + }; + let Value::Object(mut schema) = schema else { + unreachable!("search commands schema must be an object"); + }; + + let mut tool_schema = Map::new(); + for key in [ + "properties", + "required", + "type", + "additionalProperties", + "$defs", + "definitions", + ] { + if let Some(value) = schema.remove(key) { + tool_schema.insert(key.to_string(), value); + } + } + Value::Object(tool_schema) +} diff --git a/codex-rs/ext/web-search/src/tool.rs b/codex-rs/ext/web-search/src/tool.rs new file mode 100644 index 000000000..1e1db2dec --- /dev/null +++ b/codex-rs/ext/web-search/src/tool.rs @@ -0,0 +1,113 @@ +use codex_api::ReqwestTransport; +use codex_api::SearchClient; +use codex_api::SearchCommands; +use codex_api::SearchRequest; +use codex_api::SearchSettings; +use codex_extension_api::FunctionCallError; +use codex_extension_api::ResponsesApiTool; +use codex_extension_api::ToolCall; +use codex_extension_api::ToolExecutor; +use codex_extension_api::ToolName; +use codex_extension_api::ToolOutput; +use codex_extension_api::ToolSpec; +use codex_extension_api::parse_tool_input_schema; +use codex_login::default_client::build_reqwest_client; +use codex_model_provider::SharedModelProvider; +use codex_tools::ResponsesApiNamespace; +use codex_tools::ResponsesApiNamespaceTool; +use codex_tools::ToolExposure; +use codex_tools::default_namespace_description; +use http::HeaderMap; + +use crate::history::recent_input; +use crate::output::EncryptedSearchOutput; +use crate::schema::commands_schema; + +const WEB_NAMESPACE: &str = "web"; +const RUN_TOOL_NAME: &str = "run"; +const WEB_RUN_DESCRIPTION: &str = include_str!("../web_run_description.md"); + +pub(crate) struct WebSearchTool { + pub(crate) session_id: String, + pub(crate) provider: SharedModelProvider, + pub(crate) settings: SearchSettings, +} + +#[async_trait::async_trait] +impl ToolExecutor for WebSearchTool { + fn tool_name(&self) -> ToolName { + ToolName::namespaced(WEB_NAMESPACE, RUN_TOOL_NAME) + } + + fn spec(&self) -> ToolSpec { + let parameters = match parse_tool_input_schema(&commands_schema()) { + Ok(parameters) => parameters, + Err(err) => panic!("search command schema should parse: {err}"), + }; + + ToolSpec::Namespace(ResponsesApiNamespace { + name: WEB_NAMESPACE.to_string(), + description: default_namespace_description(WEB_NAMESPACE), + tools: vec![ResponsesApiNamespaceTool::Function(ResponsesApiTool { + name: RUN_TOOL_NAME.to_string(), + description: WEB_RUN_DESCRIPTION.to_string(), + strict: false, + parameters, + output_schema: None, + defer_loading: None, + })], + }) + } + + fn exposure(&self) -> ToolExposure { + ToolExposure::DirectModelOnly + } + + async fn handle(&self, call: ToolCall) -> Result, FunctionCallError> { + let commands = parse_commands(&call)?; + let provider = self + .provider + .api_provider() + .await + .map_err(|err| FunctionCallError::Fatal(err.to_string()))?; + let auth = self + .provider + .api_auth() + .await + .map_err(|err| FunctionCallError::Fatal(err.to_string()))?; + let client = SearchClient::new( + ReqwestTransport::new(build_reqwest_client()), + provider, + auth, + ); + let request = SearchRequest { + id: self.session_id.clone(), + model: None, + reasoning: None, + input: recent_input(call.conversation_history.items()), + commands: Some(commands), + settings: Some(self.settings.clone()), + max_output_tokens: Some( + u64::try_from(call.truncation_policy.token_budget()).unwrap_or(u64::MAX), + ), + }; + let response = client + .search(&request, HeaderMap::new()) + .await + .map_err(|err| FunctionCallError::Fatal(err.to_string()))?; + + Ok(Box::new(EncryptedSearchOutput::new( + response.encrypted_output, + ))) + } +} + +fn parse_commands(call: &ToolCall) -> Result { + let arguments = call.function_arguments()?; + if arguments.trim().is_empty() { + return Ok(SearchCommands::default()); + } + + serde_json::from_str(arguments) + .map_err(|err| FunctionCallError::RespondToModel(err.to_string())) +} diff --git a/codex-rs/ext/web-search/web_run_description.md b/codex-rs/ext/web-search/web_run_description.md new file mode 100644 index 000000000..bccc3d81f --- /dev/null +++ b/codex-rs/ext/web-search/web_run_description.md @@ -0,0 +1,80 @@ +Tool for accessing the internet. + + +--- + +## Examples of different commands available in this tool + +Examples of different commands available in this tool: +* `search_query`: {"search_query": [{"q": "What is the capital of France?"}, {"q": "What is the capital of belgium?"}]}. Searches the internet for a given query (and optionally with a domain or recency filter) +* `image_query`: {"image_query":[{"q": "waterfalls"}]}. +* `open`: {"open": [{"ref_id": "turn0search0"}, {"ref_id": "https://www.openai.com", "lineno": 120}]} +* `click`: {"click": [{"ref_id": "turn0fetch3", "id": 17}]} +* `find`: {"find": [{"ref_id": "turn0fetch3", "pattern": "Annie Case"}]} +* `screenshot`: {"screenshot": [{"ref_id": "turn1view0", "pageno": 0}, {"ref_id": "turn1view0", "pageno": 3}]} +* `finance`: {"finance":[{"ticker":"AMD","type":"equity","market":"USA"}]}, {"finance":[{"ticker":"BTC","type":"crypto","market":""}]} +* `weather`: {"weather":[{"location":"San Francisco, CA"}]} +* `sports`: {"sports":[{"fn":"standings","league":"nfl"}, {"fn":"schedule","league":"nba","team":"GSW","date_from":"2025-02-24"}]} +* `time`: {"time":[{"utc_offset":"+03:00"}]} + +--- + +## Usage hints +To use this tool efficiently: +* Use multiple commands and queries in one call to get more results faster; e.g. {"search_query": [{"q": "bitcoin news"}], "finance":[{"ticker":"BTC","type":"crypto","market":""}], "find": [{"ref_id": "turn0search0", "pattern": "Annie Case"}, {"ref_id": "turn0search1", "pattern": "John Smith"}]} +* Use "response_length" to control the number of results returned by this tool, omit it if you intend to pass "short" in +* Only write required parameters; do not write empty lists or nulls where they could be omitted. +* `search_query` must have length at most 4 in each call. If it has length > 3, response_length must be medium or long +* If you find yourself in a situation where you accidentally call the `web.run` tool, it's best just to send an empty query: {"search_query": [{"q": ""}]}. + +--- + +## Decision boundary + +If the user makes an explicit request to search the internet, find latest information, look up, etc (or to not do so), you must obey their request. +When you make an assumption, always consider whether it is temporally stable; i.e. whether there's even a small (>10%) chance it has changed. If it is unstable, you must verify with browsing the internet for verification. + + +Below is a list of scenarios where browsing the internet MUST be used. PAY CLOSE ATTENTION: you MUST browse the internet in these cases. If you're unsure or on the fence, you MUST bias towards browsing the internet. +- The information could have changed recently: for example news; prices; laws; schedules; product specs; sports scores; economic indicators; political/public/company figures (e.g. the question relates to 'the president of country A' or 'the CEO of company B', which might change over time); rules; regulations; standards; software libraries that could be updated; exchange rates; recommendations (i.e., recommendations about various topics or things might be informed by what currently exists / is popular / is safe / is unsafe / is in the zeitgeist / etc.); and many many many more categories -- again, if you're on the fence, you MUST browse the internet! + - For news queries, prioritize more recent events, ensuring you compare publish dates and the date that the event happened. +- The user is seeking recommendations that could lead them to spend substantial time or money -- researching products, restaurants, travel plans, etc. +- The user wants (or would benefit from) direct quotes, links, or precise source attribution. +- A specific page, paper, dataset, PDF, or site is referenced and you haven't been given its contents. +- You're unsure about a fact, the topic is niche or emerging, or you suspect there's at least a 10% chance you will incorrectly recall it +- High-stakes accuracy matters (medical, legal, financial guidance). For these you generally should search by default because this information is highly temporally unstable +- The user explicitly says to search, browse, verify, or look it up. + + +--- + +## Special cases +If these conflict with any other instructions, these should take precedence. + + +- When the user asks for information about how to use OpenAI products, (ChatGPT, the OpenAI API, etc.), you should check the code in local env and only browse as fallback, when you browse restrict your sources to official OpenAI websites using the domains filter, unless otherwise requested. +- When using search to answer technical questions, you must only rely on primary sources (research papers, official documentation, etc.) +- Clearly indicate when you are making an inference from sources. + + +--- + +## Word limits +Responses may not excessively quote or draw on a specific source. There are several limits here: +- **Limit on verbatim quotes:** + - You may not quote more than 25 words verbatim from any single non-lyrical source, unless the source is reddit. + - For song lyrics, verbatim quotes must be limited to at most 10 words. + - Long quotes from reddit are allowed, as long as you indicate that those are direct quotes via a markdown blockquote starting with ">", copy verbatim, and link the source. +- **Word limits:** + - Each webpage source in the sources has a word limit label formatted like "[wordlim N]", in which N is the maximum number of words in the whole response that are attributed to that source. If omitted, the word limit is 200 words. + - Non-contiguous words derived from a given source must be counted to the word limit. + - The summarization limit N is a maximum for each source. + - When using multiple sources, their summarization limits add together. However, each article used must be relevant to the response. +- **Copyright compliance:** + - You must avoid providing full articles, long verbatim passages, or extensive direct quotes due to copyright concerns. + - If the user asked for a verbatim quote, the response should provide a short compliant excerpt and then answer with paraphrases and summaries. + - Again, this limit does not apply to reddit content, as long as it's appropriately indicated that those are direct quotes and you link to the source. + +--- + +Make sure to provide links to the sources you used in your response. diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index cd50de433..391e33e77 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -103,6 +103,8 @@ pub enum Feature { /// Allow the model to request web searches that fetch cached content. /// Takes precedence over `WebSearchRequest`. WebSearchCached, + /// Expose the extension-backed standalone web search tool. + StandaloneWebSearch, /// Use the legacy Landlock Linux sandbox fallback instead of the default /// bubblewrap pipeline. UseLegacyLandlock, @@ -787,6 +789,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Deprecated, default_enabled: false, }, + FeatureSpec { + id: Feature::StandaloneWebSearch, + key: "standalone_web_search", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::SearchTool, key: "search_tool", diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 28a7d6522..6112ea8a6 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -902,6 +902,13 @@ pub enum ResponseItem { Other, } +impl ResponseItem { + /// Returns whether this item is an ordinary user-role message. + pub fn is_user_message(&self) -> bool { + matches!(self, Self::Message { role, .. } if role == "user") + } +} + pub const BASE_INSTRUCTIONS_DEFAULT: &str = include_str!("prompts/base_instructions/default.md"); /// Base instructions for the model in a thread. Corresponds to the `instructions` field in the ResponsesAPI. diff --git a/codex-rs/tools/src/lib.rs b/codex-rs/tools/src/lib.rs index c141bfb37..5e8c6fc1b 100644 --- a/codex-rs/tools/src/lib.rs +++ b/codex-rs/tools/src/lib.rs @@ -8,6 +8,7 @@ mod image_detail; mod json_schema; mod mcp_tool; mod request_plugin_install; +mod response_history; mod responses_api; mod tool_call; mod tool_config; @@ -45,6 +46,8 @@ pub use request_plugin_install::RequestPluginInstallResult; pub use request_plugin_install::all_requested_connectors_picked_up; pub use request_plugin_install::build_request_plugin_install_elicitation_request; pub use request_plugin_install::verified_connector_install_completed; +pub use response_history::retain_tail_from_last_n_user_messages; +pub use response_history::truncate_assistant_output_text_to_token_budget; pub use responses_api::FreeformTool; pub use responses_api::FreeformToolFormat; pub use responses_api::LoadableToolSpec; diff --git a/codex-rs/tools/src/response_history.rs b/codex-rs/tools/src/response_history.rs new file mode 100644 index 000000000..cee6f438c --- /dev/null +++ b/codex-rs/tools/src/response_history.rs @@ -0,0 +1,149 @@ +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; +use codex_utils_output_truncation::TruncationPolicy; +use codex_utils_output_truncation::approx_token_count; +use codex_utils_output_truncation::truncate_text; + +/// Retains items from the earliest of the last `user_message_count` user +/// messages through the latest user message. +pub fn retain_tail_from_last_n_user_messages( + items: &mut Vec, + user_message_count: usize, +) { + if user_message_count == 0 { + items.clear(); + return; + } + + let Some(latest_user_idx) = items.iter().rposition(ResponseItem::is_user_message) else { + items.clear(); + return; + }; + items.truncate(latest_user_idx + 1); + + let earliest_retained_user_idx = items + .iter() + .enumerate() + .rev() + .filter(|(_, item)| item.is_user_message()) + .take(user_message_count) + .last() + .map(|(idx, _)| idx) + .unwrap_or(latest_user_idx); + items.drain(..earliest_retained_user_idx); +} + +/// Truncates assistant output text to a shared token budget across items. +pub fn truncate_assistant_output_text_to_token_budget( + items: &mut Vec, + max_tokens: usize, +) { + let mut remaining_budget = max_tokens; + + items.retain_mut(|item| { + let ResponseItem::Message { role, content, .. } = item else { + return true; + }; + if role != "assistant" { + return true; + } + + content.retain_mut(|content_item| { + let ContentItem::OutputText { text } = content_item else { + return true; + }; + if remaining_budget == 0 { + return false; + } + + let token_count = approx_token_count(text); + if token_count <= remaining_budget { + remaining_budget = remaining_budget.saturating_sub(token_count); + return true; + } + + *text = truncate_text(text, TruncationPolicy::Tokens(remaining_budget)); + remaining_budget = 0; + true + }); + !content.is_empty() + }); +} + +#[cfg(test)] +mod tests { + use codex_protocol::models::ContentItem; + use codex_protocol::models::ResponseItem; + use codex_utils_output_truncation::TruncationPolicy; + use codex_utils_output_truncation::truncate_text; + use pretty_assertions::assert_eq; + + use super::retain_tail_from_last_n_user_messages; + use super::truncate_assistant_output_text_to_token_budget; + + fn message(role: &str, text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: role.to_string(), + content: vec![if role == "assistant" { + ContentItem::OutputText { + text: text.to_string(), + } + } else { + ContentItem::InputText { + text: text.to_string(), + } + }], + phase: None, + } + } + + #[test] + fn retains_tail_through_latest_user_message() { + let mut items = vec![ + message("system", "system"), + message("user", "old user"), + message("assistant", "old assistant"), + message("user", "previous user"), + message("assistant", "previous assistant"), + message("user", "current user"), + message("assistant", "later assistant"), + ]; + + retain_tail_from_last_n_user_messages(&mut items, /*user_message_count*/ 2); + + assert_eq!( + items, + vec![ + message("user", "previous user"), + message("assistant", "previous assistant"), + message("user", "current user"), + ] + ); + } + + #[test] + fn truncates_assistant_output_text_across_items() { + let long_assistant = "a".repeat(16); + let mut items = vec![ + message("user", "previous user"), + message("assistant", &long_assistant), + message("assistant", "after budget"), + message("user", "current user"), + ]; + + truncate_assistant_output_text_to_token_budget(&mut items, /*max_tokens*/ 2); + + assert_eq!( + items, + vec![ + message("user", "previous user"), + message( + "assistant", + &truncate_text(&long_assistant, TruncationPolicy::Tokens(2)), + ), + message("user", "current user"), + ] + ); + } +}