[codex] trace MCP startup latency (#28630)

## Summary

- add trace-level instrumentation around per-server MCP setup, client
construction, initialization, and initial tool listing
- trace Codex Apps tool and server-info cache loads
- attach `server_name` to server-scoped spans so slow startup work can
be attributed to a specific MCP server

## Why

`session_init.mcp_manager_init` can occasionally be slow, but its
existing coarse span does not identify whether time is spent loading the
Codex Apps cache, constructing a client, initializing a transport, or
listing tools. These definition-level spans provide that breakdown
without changing startup behavior.

## Validation

- `just test -p codex-mcp` (87 passed)
- `just test -p codex-rmcp-client` (86 passed, 2 skipped)
This commit is contained in:
rphilizaire-openai
2026-06-23 17:46:54 -07:00
committed by GitHub
Unverified
parent 61ff4d087e
commit 322e33512b
3 changed files with 13 additions and 1 deletions
+3
View File
@@ -19,6 +19,7 @@ use serde::Deserialize;
use serde::Serialize;
use sha1::Digest;
use sha1::Sha1;
use tracing::instrument;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CodexAppsToolsCacheKey {
@@ -170,6 +171,7 @@ pub(crate) fn read_cached_codex_apps_tools(
}
}
#[instrument(level = "trace", skip_all)]
pub(crate) fn load_cached_codex_apps_tools(
cache_context: &CodexAppsToolsCacheContext,
) -> CachedCodexAppsToolsLoad {
@@ -210,6 +212,7 @@ pub(crate) fn write_cached_codex_apps_tools(
let _ = std::fs::write(cache_path, bytes);
}
#[instrument(level = "trace", skip_all)]
pub(crate) fn load_cached_codex_apps_server_info(
cache_context: &CodexAppsToolsCacheContext,
) -> Option<McpServerInfo> {
+7 -1
View File
@@ -67,6 +67,8 @@ use rmcp::model::JsonObject;
use rmcp::model::ProtocolVersion;
use rmcp::model::Tool as RmcpTool;
use tokio_util::sync::CancellationToken;
use tracing::Instrument;
use tracing::instrument;
use tracing::warn;
/// MCP server capability indicating that Codex should include [`SandboxState`]
@@ -141,6 +143,7 @@ pub(crate) struct AsyncManagedClient {
impl AsyncManagedClient {
// Keep this constructor flat so the startup inputs remain readable at the
// single call site instead of introducing a one-off params wrapper.
#[instrument(level = "trace", skip_all, fields(server_name = %server_name))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn new(
server_name: String,
@@ -228,7 +231,7 @@ impl AsyncManagedClient {
startup_complete_for_fut.store(true, Ordering::Release);
outcome
};
let client = fut.boxed().shared();
let client = fut.in_current_span().boxed().shared();
if cached_tool_info_snapshot.is_some() {
let startup_task = client.clone();
tokio::spawn(async move {
@@ -306,6 +309,7 @@ impl From<anyhow::Error> for StartupOutcomeError {
}
}
#[instrument(level = "trace", skip_all, fields(server_name = %server_name))]
pub(crate) async fn list_tools_for_client_uncached(
server_name: &str,
is_codex_apps_mcp_server: bool,
@@ -529,6 +533,7 @@ fn validate_mcp_server_name(server_name: &str) -> Result<()> {
Ok(())
}
#[instrument(level = "trace", skip_all, fields(server_name = %server_name))]
async fn start_server_task(
server_name: String,
client: Arc<RmcpClient>,
@@ -658,6 +663,7 @@ struct StartServerTaskParams {
supports_openai_form_elicitation: bool,
}
#[instrument(level = "trace", skip_all, fields(server_name = %server_name))]
async fn make_rmcp_client(
server_name: &str,
server: EffectiveMcpServer,
+3
View File
@@ -58,6 +58,7 @@ use tokio::sync::Mutex;
use tokio::sync::Semaphore;
use tokio::sync::watch;
use tokio::time;
use tracing::instrument;
use tracing::warn;
use crate::elicitation_client_service::ElicitationClientService;
@@ -420,6 +421,7 @@ impl RmcpClient {
/// Perform the initialization handshake with the MCP server.
/// https://modelcontextprotocol.io/specification/2025-06-18/basic/lifecycle#initialization
#[instrument(level = "trace", skip_all)]
pub async fn initialize(
&self,
params: InitializeRequestParams,
@@ -501,6 +503,7 @@ impl RmcpClient {
Ok(result)
}
#[instrument(level = "trace", skip_all)]
pub async fn list_tools_with_connector_ids(
&self,
params: Option<PaginatedRequestParams>,