diff --git a/codex-rs/exec-server-protocol/src/protocol.rs b/codex-rs/exec-server-protocol/src/protocol.rs index 5ae80c37e..133654dc0 100644 --- a/codex-rs/exec-server-protocol/src/protocol.rs +++ b/codex-rs/exec-server-protocol/src/protocol.rs @@ -2,6 +2,8 @@ use std::collections::HashMap; use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; use codex_file_system::FileSystemSandboxContext; +pub use codex_file_system::WalkOptions; +pub use codex_file_system::WalkOutcome; use codex_network_proxy::ManagedNetworkSandboxContext; use codex_protocol::config_types::ShellEnvironmentPolicyInherit; use codex_shell_command::shell_detect::DetectedShell; @@ -31,6 +33,7 @@ pub const FS_CREATE_DIRECTORY_METHOD: &str = "fs/createDirectory"; pub const FS_GET_METADATA_METHOD: &str = "fs/getMetadata"; pub const FS_CANONICALIZE_METHOD: &str = "fs/canonicalize"; pub const FS_READ_DIRECTORY_METHOD: &str = "fs/readDirectory"; +pub const FS_WALK_METHOD: &str = "fs/walk"; pub const FS_REMOVE_METHOD: &str = "fs/remove"; pub const FS_COPY_METHOD: &str = "fs/copy"; /// JSON-RPC request method for executor-side HTTP requests. @@ -370,6 +373,16 @@ pub struct FsReadDirectoryResponse { pub entries: Vec, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct FsWalkParams { + pub path: PathUri, + pub options: WalkOptions, + pub sandbox: Option, +} + +pub type FsWalkResponse = WalkOutcome; + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct FsRemoveParams { diff --git a/codex-rs/exec-server/src/client.rs b/codex-rs/exec-server/src/client.rs index f913892e1..a00a2ce8b 100644 --- a/codex-rs/exec-server/src/client.rs +++ b/codex-rs/exec-server/src/client.rs @@ -58,6 +58,7 @@ use crate::protocol::FS_READ_BLOCK_METHOD; use crate::protocol::FS_READ_DIRECTORY_METHOD; use crate::protocol::FS_READ_FILE_METHOD; use crate::protocol::FS_REMOVE_METHOD; +use crate::protocol::FS_WALK_METHOD; use crate::protocol::FS_WRITE_FILE_METHOD; use crate::protocol::FsCanonicalizeParams; use crate::protocol::FsCanonicalizeResponse; @@ -79,6 +80,8 @@ use crate::protocol::FsReadFileParams; use crate::protocol::FsReadFileResponse; use crate::protocol::FsRemoveParams; use crate::protocol::FsRemoveResponse; +use crate::protocol::FsWalkParams; +use crate::protocol::FsWalkResponse; use crate::protocol::FsWriteFileParams; use crate::protocol::FsWriteFileResponse; use crate::protocol::HTTP_REQUEST_BODY_DELTA_METHOD; @@ -620,6 +623,10 @@ impl ExecServerClient { self.call(FS_READ_DIRECTORY_METHOD, ¶ms).await } + pub async fn fs_walk(&self, params: FsWalkParams) -> Result { + self.call(FS_WALK_METHOD, ¶ms).await + } + pub async fn fs_remove( &self, params: FsRemoveParams, diff --git a/codex-rs/exec-server/src/fs_helper.rs b/codex-rs/exec-server/src/fs_helper.rs index 1eba3d0be..97c968275 100644 --- a/codex-rs/exec-server/src/fs_helper.rs +++ b/codex-rs/exec-server/src/fs_helper.rs @@ -17,6 +17,7 @@ use crate::protocol::FS_GET_METADATA_METHOD; use crate::protocol::FS_READ_DIRECTORY_METHOD; use crate::protocol::FS_READ_FILE_METHOD; use crate::protocol::FS_REMOVE_METHOD; +use crate::protocol::FS_WALK_METHOD; use crate::protocol::FS_WRITE_FILE_METHOD; use crate::protocol::FsCanonicalizeParams; use crate::protocol::FsCanonicalizeResponse; @@ -33,6 +34,8 @@ use crate::protocol::FsReadFileParams; use crate::protocol::FsReadFileResponse; use crate::protocol::FsRemoveParams; use crate::protocol::FsRemoveResponse; +use crate::protocol::FsWalkParams; +use crate::protocol::FsWalkResponse; use crate::protocol::FsWriteFileParams; use crate::protocol::FsWriteFileResponse; use crate::rpc::internal_error; @@ -56,6 +59,8 @@ pub(crate) enum FsHelperRequest { Canonicalize(FsCanonicalizeParams), #[serde(rename = "fs/readDirectory")] ReadDirectory(FsReadDirectoryParams), + #[serde(rename = "fs/walk")] + Walk(FsWalkParams), #[serde(rename = "fs/remove")] Remove(FsRemoveParams), #[serde(rename = "fs/copy")] @@ -84,6 +89,8 @@ pub(crate) enum FsHelperPayload { Canonicalize(FsCanonicalizeResponse), #[serde(rename = "fs/readDirectory")] ReadDirectory(FsReadDirectoryResponse), + #[serde(rename = "fs/walk")] + Walk(FsWalkResponse), #[serde(rename = "fs/remove")] Remove(FsRemoveResponse), #[serde(rename = "fs/copy")] @@ -99,6 +106,7 @@ impl FsHelperPayload { Self::GetMetadata(_) => FS_GET_METADATA_METHOD, Self::Canonicalize(_) => FS_CANONICALIZE_METHOD, Self::ReadDirectory(_) => FS_READ_DIRECTORY_METHOD, + Self::Walk(_) => FS_WALK_METHOD, Self::Remove(_) => FS_REMOVE_METHOD, Self::Copy(_) => FS_COPY_METHOD, } @@ -162,6 +170,13 @@ impl FsHelperPayload { } } + pub(crate) fn expect_walk(self) -> Result { + match self { + Self::Walk(response) => Ok(response), + other => Err(unexpected_response(FS_WALK_METHOD, other.operation())), + } + } + pub(crate) fn expect_remove(self) -> Result { match self { Self::Remove(response) => Ok(response), @@ -263,6 +278,13 @@ pub(crate) async fn run_direct_request( entries, })) } + FsHelperRequest::Walk(params) => { + let outcome = file_system + .walk(¶ms.path, params.options, /*sandbox*/ None) + .await + .map_err(map_fs_error)?; + Ok(FsHelperPayload::Walk(outcome)) + } FsHelperRequest::Remove(params) => { file_system .remove( diff --git a/codex-rs/exec-server/src/lib.rs b/codex-rs/exec-server/src/lib.rs index 9827e741d..69d4c07df 100644 --- a/codex-rs/exec-server/src/lib.rs +++ b/codex-rs/exec-server/src/lib.rs @@ -51,6 +51,11 @@ pub use codex_file_system::FileSystemResult; pub use codex_file_system::FileSystemSandboxContext; pub use codex_file_system::ReadDirectoryEntry; pub use codex_file_system::RemoveOptions; +pub use codex_file_system::WalkEntry; +pub use codex_file_system::WalkEntryKind; +pub use codex_file_system::WalkError; +pub use codex_file_system::WalkOptions; +pub use codex_file_system::WalkOutcome; pub use environment::CODEX_EXEC_SERVER_NOISE_AUTH_TOKEN_ENV_VAR; pub use environment::CODEX_EXEC_SERVER_NOISE_CHATGPT_ACCOUNT_ID_ENV_VAR; pub use environment::CODEX_EXEC_SERVER_NOISE_ENVIRONMENT_ID_ENV_VAR; @@ -113,6 +118,8 @@ pub use protocol::FsReadFileParams; pub use protocol::FsReadFileResponse; pub use protocol::FsRemoveParams; pub use protocol::FsRemoveResponse; +pub use protocol::FsWalkParams; +pub use protocol::FsWalkResponse; pub use protocol::FsWriteFileParams; pub use protocol::FsWriteFileResponse; pub use protocol::HttpHeader; diff --git a/codex-rs/exec-server/src/local_file_system.rs b/codex-rs/exec-server/src/local_file_system.rs index 3129606d6..281fd1eeb 100644 --- a/codex-rs/exec-server/src/local_file_system.rs +++ b/codex-rs/exec-server/src/local_file_system.rs @@ -22,6 +22,8 @@ use crate::FileSystemResult; use crate::FileSystemSandboxContext; use crate::ReadDirectoryEntry; use crate::RemoveOptions; +use crate::WalkOptions; +use crate::WalkOutcome; use crate::regular_file; use crate::sandboxed_file_system::SandboxedFileSystem; @@ -170,6 +172,16 @@ impl LocalFileSystem { file_system.read_directory(path, sandbox).await } + async fn walk( + &self, + path: &PathUri, + options: WalkOptions, + sandbox: Option<&FileSystemSandboxContext>, + ) -> FileSystemResult { + let (file_system, sandbox) = self.file_system_for(sandbox)?; + file_system.walk(path, options, sandbox).await + } + async fn remove( &self, path: &PathUri, @@ -255,6 +267,15 @@ impl ExecutorFileSystem for LocalFileSystem { Box::pin(LocalFileSystem::read_directory(self, path, sandbox)) } + fn walk<'a>( + &'a self, + path: &'a PathUri, + options: WalkOptions, + sandbox: Option<&'a FileSystemSandboxContext>, + ) -> ExecutorFileSystemFuture<'a, WalkOutcome> { + Box::pin(LocalFileSystem::walk(self, path, options, sandbox)) + } + fn remove<'a>( &'a self, path: &'a PathUri, diff --git a/codex-rs/exec-server/src/remote_file_system.rs b/codex-rs/exec-server/src/remote_file_system.rs index f9136e146..acc86f758 100644 --- a/codex-rs/exec-server/src/remote_file_system.rs +++ b/codex-rs/exec-server/src/remote_file_system.rs @@ -15,6 +15,8 @@ use crate::FileSystemResult; use crate::FileSystemSandboxContext; use crate::ReadDirectoryEntry; use crate::RemoveOptions; +use crate::WalkOptions; +use crate::WalkOutcome; use crate::client::LazyRemoteExecServerClient; use crate::protocol::FsCanonicalizeParams; use crate::protocol::FsCopyParams; @@ -23,6 +25,7 @@ use crate::protocol::FsGetMetadataParams; use crate::protocol::FsReadDirectoryParams; use crate::protocol::FsReadFileParams; use crate::protocol::FsRemoveParams; +use crate::protocol::FsWalkParams; use crate::protocol::FsWriteFileParams; const INVALID_REQUEST_ERROR_CODE: i64 = -32600; @@ -183,6 +186,25 @@ impl RemoteFileSystem { .collect()) } + async fn walk( + &self, + path: &PathUri, + options: WalkOptions, + sandbox: Option<&FileSystemSandboxContext>, + ) -> FileSystemResult { + trace!("remote fs walk"); + let client = self.client.get().await.map_err(map_remote_error)?; + let response = client + .fs_walk(FsWalkParams { + path: path.clone(), + options, + sandbox: remote_sandbox_context(sandbox), + }) + .await + .map_err(map_remote_error)?; + Ok(response) + } + async fn remove( &self, path: &PathUri, @@ -286,6 +308,15 @@ impl ExecutorFileSystem for RemoteFileSystem { Box::pin(RemoteFileSystem::read_directory(self, path, sandbox)) } + fn walk<'a>( + &'a self, + path: &'a PathUri, + options: WalkOptions, + sandbox: Option<&'a FileSystemSandboxContext>, + ) -> ExecutorFileSystemFuture<'a, WalkOutcome> { + Box::pin(RemoteFileSystem::walk(self, path, options, sandbox)) + } + fn remove<'a>( &'a self, path: &'a PathUri, diff --git a/codex-rs/exec-server/src/sandboxed_file_system.rs b/codex-rs/exec-server/src/sandboxed_file_system.rs index 81007e65e..5feefe3ba 100644 --- a/codex-rs/exec-server/src/sandboxed_file_system.rs +++ b/codex-rs/exec-server/src/sandboxed_file_system.rs @@ -15,6 +15,8 @@ use crate::FileSystemResult; use crate::FileSystemSandboxContext; use crate::ReadDirectoryEntry; use crate::RemoveOptions; +use crate::WalkOptions; +use crate::WalkOutcome; use crate::fs_helper::FsHelperPayload; use crate::fs_helper::FsHelperRequest; use crate::fs_sandbox::FileSystemSandboxRunner; @@ -25,6 +27,7 @@ use crate::protocol::FsGetMetadataParams; use crate::protocol::FsReadDirectoryParams; use crate::protocol::FsReadFileParams; use crate::protocol::FsRemoveParams; +use crate::protocol::FsWalkParams; use crate::protocol::FsWriteFileParams; #[derive(Clone)] @@ -200,6 +203,29 @@ impl SandboxedFileSystem { .collect()) } + async fn walk( + &self, + path: &PathUri, + options: WalkOptions, + sandbox: Option<&FileSystemSandboxContext>, + ) -> FileSystemResult { + let sandbox = require_platform_sandbox(sandbox)?; + validate_native_path(path)?; + let response = self + .run_sandboxed( + sandbox, + FsHelperRequest::Walk(FsWalkParams { + path: path.clone(), + options, + sandbox: None, + }), + ) + .await? + .expect_walk() + .map_err(map_sandbox_error)?; + Ok(response) + } + async fn remove( &self, path: &PathUri, @@ -317,6 +343,15 @@ impl ExecutorFileSystem for SandboxedFileSystem { Box::pin(SandboxedFileSystem::read_directory(self, path, sandbox)) } + fn walk<'a>( + &'a self, + path: &'a PathUri, + options: WalkOptions, + sandbox: Option<&'a FileSystemSandboxContext>, + ) -> ExecutorFileSystemFuture<'a, WalkOutcome> { + Box::pin(SandboxedFileSystem::walk(self, path, options, sandbox)) + } + fn remove<'a>( &'a self, path: &'a PathUri, diff --git a/codex-rs/exec-server/src/server/file_system_handler.rs b/codex-rs/exec-server/src/server/file_system_handler.rs index a77b7ac71..cbf263dad 100644 --- a/codex-rs/exec-server/src/server/file_system_handler.rs +++ b/codex-rs/exec-server/src/server/file_system_handler.rs @@ -33,6 +33,8 @@ use crate::protocol::FsReadFileParams; use crate::protocol::FsReadFileResponse; use crate::protocol::FsRemoveParams; use crate::protocol::FsRemoveResponse; +use crate::protocol::FsWalkParams; +use crate::protocol::FsWalkResponse; use crate::protocol::FsWriteFileParams; use crate::protocol::FsWriteFileResponse; use crate::rpc::internal_error; @@ -198,6 +200,16 @@ impl FileSystemHandler { Ok(FsReadDirectoryResponse { entries }) } + pub(crate) async fn walk( + &self, + params: FsWalkParams, + ) -> Result { + self.file_system + .walk(¶ms.path, params.options, params.sandbox.as_ref()) + .await + .map_err(map_fs_error) + } + pub(crate) async fn remove( &self, params: FsRemoveParams, diff --git a/codex-rs/exec-server/src/server/handler.rs b/codex-rs/exec-server/src/server/handler.rs index 28d78a900..561cf0ff6 100644 --- a/codex-rs/exec-server/src/server/handler.rs +++ b/codex-rs/exec-server/src/server/handler.rs @@ -37,6 +37,8 @@ use crate::protocol::FsReadFileParams; use crate::protocol::FsReadFileResponse; use crate::protocol::FsRemoveParams; use crate::protocol::FsRemoveResponse; +use crate::protocol::FsWalkParams; +use crate::protocol::FsWalkResponse; use crate::protocol::FsWriteFileParams; use crate::protocol::FsWriteFileResponse; use crate::protocol::HttpRequestParams; @@ -311,6 +313,14 @@ impl ExecServerHandler { self.file_system.read_directory(params).await } + pub(crate) async fn fs_walk( + &self, + params: FsWalkParams, + ) -> Result { + self.require_initialized_for("filesystem")?; + self.file_system.walk(params).await + } + pub(crate) async fn fs_remove( &self, params: FsRemoveParams, diff --git a/codex-rs/exec-server/src/server/registry.rs b/codex-rs/exec-server/src/server/registry.rs index 8f48aeaf9..13acfd2a6 100644 --- a/codex-rs/exec-server/src/server/registry.rs +++ b/codex-rs/exec-server/src/server/registry.rs @@ -17,6 +17,7 @@ use crate::protocol::FS_READ_BLOCK_METHOD; use crate::protocol::FS_READ_DIRECTORY_METHOD; use crate::protocol::FS_READ_FILE_METHOD; use crate::protocol::FS_REMOVE_METHOD; +use crate::protocol::FS_WALK_METHOD; use crate::protocol::FS_WRITE_FILE_METHOD; use crate::protocol::FsCanonicalizeParams; use crate::protocol::FsCloseParams; @@ -28,6 +29,7 @@ use crate::protocol::FsReadBlockParams; use crate::protocol::FsReadDirectoryParams; use crate::protocol::FsReadFileParams; use crate::protocol::FsRemoveParams; +use crate::protocol::FsWalkParams; use crate::protocol::FsWriteFileParams; use crate::protocol::HTTP_REQUEST_METHOD; use crate::protocol::HttpRequestParams; @@ -147,6 +149,12 @@ pub(crate) fn build_router() -> RpcRouter { handler.fs_read_directory(params).await }, ); + router.request( + FS_WALK_METHOD, + |handler: Arc, params: FsWalkParams| async move { + handler.fs_walk(params).await + }, + ); router.request( FS_REMOVE_METHOD, |handler: Arc, params: FsRemoveParams| async move { diff --git a/codex-rs/exec-server/tests/file_system/shared.rs b/codex-rs/exec-server/tests/file_system/shared.rs index 8eb17f39f..0f12cd592 100644 --- a/codex-rs/exec-server/tests/file_system/shared.rs +++ b/codex-rs/exec-server/tests/file_system/shared.rs @@ -6,6 +6,10 @@ use codex_exec_server::FILE_READ_CHUNK_SIZE; use codex_exec_server::FileMetadata; use codex_exec_server::ReadDirectoryEntry; use codex_exec_server::RemoveOptions; +use codex_exec_server::WalkEntry; +use codex_exec_server::WalkEntryKind; +use codex_exec_server::WalkOptions; +use codex_exec_server::WalkOutcome; use codex_protocol::models::AdditionalPermissionProfile; use codex_protocol::models::FileSystemPermissions; use codex_protocol::models::PermissionProfile; @@ -373,6 +377,179 @@ async fn file_system_read_directory_lists_entries( Ok(()) } +#[test_case(FileSystemImplementation::Local ; "local")] +#[test_case(FileSystemImplementation::Remote ; "remote")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn file_system_walk_returns_a_bounded_tree( + implementation: FileSystemImplementation, +) -> Result<()> { + let context = create_file_system_context(implementation).await?; + let file_system = context.file_system; + + let tmp = TempDir::new()?; + let source_dir = tmp.path().join("source"); + let nested_dir = source_dir.join("nested"); + std::fs::create_dir_all(&nested_dir)?; + std::fs::write(source_dir.join("root.txt"), "root")?; + std::fs::write(nested_dir.join("note.txt"), "nested")?; + + let source_uri = PathUri::from_host_native_path(&source_dir)?; + let outcome = file_system + .walk( + &source_uri, + WalkOptions { + max_depth: 4, + max_directories: 10, + max_entries: 10, + }, + /*sandbox*/ None, + ) + .await + .with_context(|| format!("mode={implementation}"))?; + assert_eq!( + outcome, + WalkOutcome { + entries: vec![ + WalkEntry { + path: PathUri::from_host_native_path(&nested_dir)?, + kind: WalkEntryKind::Directory, + }, + WalkEntry { + path: PathUri::from_host_native_path(source_dir.join("root.txt"))?, + kind: WalkEntryKind::File, + }, + WalkEntry { + path: PathUri::from_host_native_path(nested_dir.join("note.txt"))?, + kind: WalkEntryKind::File, + }, + ], + errors: Vec::new(), + truncated: false, + } + ); + + let root_entries = vec![ + WalkEntry { + path: PathUri::from_host_native_path(&nested_dir)?, + kind: WalkEntryKind::Directory, + }, + WalkEntry { + path: PathUri::from_host_native_path(source_dir.join("root.txt"))?, + kind: WalkEntryKind::File, + }, + ]; + let shallow = file_system + .walk( + &source_uri, + WalkOptions { + max_depth: 0, + max_directories: 10, + max_entries: 10, + }, + /*sandbox*/ None, + ) + .await + .with_context(|| format!("mode={implementation}"))?; + assert_eq!( + shallow, + WalkOutcome { + entries: root_entries.clone(), + errors: Vec::new(), + truncated: false, + } + ); + + let directory_bounded = file_system + .walk( + &source_uri, + WalkOptions { + max_depth: 4, + max_directories: 1, + max_entries: 10, + }, + /*sandbox*/ None, + ) + .await + .with_context(|| format!("mode={implementation}"))?; + assert_eq!( + directory_bounded, + WalkOutcome { + entries: root_entries, + errors: Vec::new(), + truncated: true, + } + ); + + let bounded = file_system + .walk( + &source_uri, + WalkOptions { + max_depth: 4, + max_directories: 10, + max_entries: 1, + }, + /*sandbox*/ None, + ) + .await + .with_context(|| format!("mode={implementation}"))?; + assert_eq!( + bounded, + WalkOutcome { + entries: vec![WalkEntry { + path: PathUri::from_host_native_path(&nested_dir)?, + kind: WalkEntryKind::Directory, + }], + errors: Vec::new(), + truncated: true, + } + ); + + Ok(()) +} + +#[test_case(FileSystemImplementation::Local ; "local")] +#[test_case(FileSystemImplementation::Remote ; "remote")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn file_system_walk_honors_read_sandbox( + implementation: FileSystemImplementation, +) -> Result<()> { + let context = create_file_system_context(implementation).await?; + let file_system = context.file_system; + + let tmp = TempDir::new()?; + let source_dir = tmp.path().join("source"); + let file_path = source_dir.join("note.txt"); + std::fs::create_dir_all(&source_dir)?; + std::fs::write(&file_path, "sandboxed")?; + let sandbox = read_only_sandbox(source_dir.clone()); + + let outcome = file_system + .walk( + &PathUri::from_host_native_path(&source_dir)?, + WalkOptions { + max_depth: 1, + max_directories: 2, + max_entries: 2, + }, + Some(&sandbox), + ) + .await + .with_context(|| format!("mode={implementation}"))?; + assert_eq!( + outcome, + WalkOutcome { + entries: vec![WalkEntry { + path: PathUri::from_host_native_path(file_path)?, + kind: WalkEntryKind::File, + }], + errors: Vec::new(), + truncated: false, + } + ); + + Ok(()) +} + #[test_case(FileSystemImplementation::Local ; "local")] #[test_case(FileSystemImplementation::Remote ; "remote")] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/codex-rs/exec-server/tests/file_system_unix.rs b/codex-rs/exec-server/tests/file_system_unix.rs index 0287844a7..320f52108 100644 --- a/codex-rs/exec-server/tests/file_system_unix.rs +++ b/codex-rs/exec-server/tests/file_system_unix.rs @@ -24,6 +24,10 @@ use codex_exec_server::CreateDirectoryOptions; use codex_exec_server::Environment; use codex_exec_server::FileMetadata; use codex_exec_server::RemoveOptions; +use codex_exec_server::WalkEntry; +use codex_exec_server::WalkEntryKind; +use codex_exec_server::WalkOptions; +use codex_exec_server::WalkOutcome; use codex_utils_path_uri::PathUri; use pretty_assertions::assert_eq; use tempfile::TempDir; @@ -266,6 +270,54 @@ async fn file_system_get_metadata_reports_symlink_targets( Ok(()) } +#[test_case(FileSystemImplementation::Local ; "local")] +#[test_case(FileSystemImplementation::Remote ; "remote")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn file_system_walk_ignores_symlinks(implementation: FileSystemImplementation) -> Result<()> { + let context = create_file_system_context(implementation).await?; + let file_system = context.file_system; + + let tmp = TempDir::new()?; + let root = tmp.path().join("root"); + let target = root.join("target"); + let target_file = target.join("note.txt"); + std::fs::create_dir_all(&target)?; + std::fs::write(&target_file, "target")?; + symlink(&target, root.join("target-link"))?; + + let outcome = file_system + .walk( + &PathUri::from_host_native_path(&root)?, + WalkOptions { + max_depth: 2, + max_directories: 4, + max_entries: 8, + }, + /*sandbox*/ None, + ) + .await + .with_context(|| format!("mode={implementation}"))?; + assert_eq!( + outcome, + WalkOutcome { + entries: vec![ + WalkEntry { + path: PathUri::from_host_native_path(&target)?, + kind: WalkEntryKind::Directory, + }, + WalkEntry { + path: PathUri::from_host_native_path(target_file)?, + kind: WalkEntryKind::File, + }, + ], + errors: Vec::new(), + truncated: false, + } + ); + + Ok(()) +} + #[test_case(FileSystemImplementation::Local ; "local")] #[test_case(FileSystemImplementation::Remote ; "remote")] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/codex-rs/file-system/src/lib.rs b/codex-rs/file-system/src/lib.rs index 3ba787440..3ae4b5675 100644 --- a/codex-rs/file-system/src/lib.rs +++ b/codex-rs/file-system/src/lib.rs @@ -12,6 +12,7 @@ use codex_protocol::protocol::SandboxPolicy; use codex_utils_absolute_path::AbsolutePathBuf; use codex_utils_path_uri::PathUri; use futures::Stream; +use std::collections::VecDeque; use std::future::Future; use std::io; use std::path::Path; @@ -21,6 +22,11 @@ use std::task::Poll; /// Maximum chunk size returned by [`ExecutorFileSystem::read_file_stream`]. pub const FILE_READ_CHUNK_SIZE: usize = 1024 * 1024; +const MAX_WALK_DEPTH: usize = 64; +const MAX_WALK_DIRECTORIES: usize = 10_000; +const MAX_WALK_ENTRIES: usize = 50_000; +const MAX_WALK_RESPONSE_BYTES: usize = 4 * 1024 * 1024; +const WALK_RESPONSE_ITEM_OVERHEAD_BYTES: usize = 64; #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct CreateDirectoryOptions { @@ -56,6 +62,51 @@ pub struct ReadDirectoryEntry { pub is_file: bool, } +/// Bounds for a recursive filesystem walk. +#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "camelCase")] +pub struct WalkOptions { + /// Maximum directory depth below the root that may be traversed. + pub max_depth: usize, + /// Maximum number of directories that may be traversed, including the root. + pub max_directories: usize, + /// Maximum number of directory entries that may be examined. + pub max_entries: usize, +} + +/// Type of a filesystem entry returned by a walk. +#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "camelCase")] +pub enum WalkEntryKind { + Directory, + File, +} + +/// One non-symlink entry returned by a walk. +#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "camelCase")] +pub struct WalkEntry { + pub path: PathUri, + pub kind: WalkEntryKind, +} + +/// A descendant that could not be inspected during a walk. +#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "camelCase")] +pub struct WalkError { + pub path: PathUri, + pub message: String, +} + +/// Entries and recoverable errors collected by a bounded walk. +#[derive(Clone, Debug, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "camelCase")] +pub struct WalkOutcome { + pub entries: Vec, + pub errors: Vec, + pub truncated: bool, +} + #[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub struct FileSystemSandboxContext { @@ -249,6 +300,16 @@ pub trait ExecutorFileSystem: Send + Sync { sandbox: Option<&'a FileSystemSandboxContext>, ) -> ExecutorFileSystemFuture<'a, Vec>; + /// Recursively lists descendants, skipping symlinks. + fn walk<'a>( + &'a self, + path: &'a PathUri, + options: WalkOptions, + sandbox: Option<&'a FileSystemSandboxContext>, + ) -> ExecutorFileSystemFuture<'a, WalkOutcome> { + Box::pin(walk(self, path, options, sandbox)) + } + fn remove<'a>( &'a self, path: &'a PathUri, @@ -264,3 +325,155 @@ pub trait ExecutorFileSystem: Send + Sync { sandbox: Option<&'a FileSystemSandboxContext>, ) -> ExecutorFileSystemFuture<'a, ()>; } + +async fn walk( + file_system: &F, + root: &PathUri, + options: WalkOptions, + sandbox: Option<&FileSystemSandboxContext>, +) -> FileSystemResult { + if options.max_directories == 0 || options.max_entries == 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "filesystem walk limits must be greater than zero", + )); + } + if options.max_depth > MAX_WALK_DEPTH + || options.max_directories > MAX_WALK_DIRECTORIES + || options.max_entries > MAX_WALK_ENTRIES + { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "filesystem walk limits exceed maximums: depth={MAX_WALK_DEPTH}, directories={MAX_WALK_DIRECTORIES}, entries={MAX_WALK_ENTRIES}" + ), + )); + } + + let root_metadata = file_system.get_metadata(root, sandbox).await?; + if root_metadata.is_symlink || !root_metadata.is_directory { + return Ok(WalkOutcome::default()); + } + + let mut outcome = WalkOutcome::default(); + let mut queue = VecDeque::from([(root.clone(), 0usize)]); + let mut directory_count = 1usize; + let mut entry_count = 0usize; + let mut response_bytes = 0usize; + + while let Some((directory, depth)) = queue.pop_front() { + let mut entries = match file_system.read_directory(&directory, sandbox).await { + Ok(entries) => entries, + Err(error) => { + if !push_walk_error( + &mut outcome, + &mut response_bytes, + directory, + error.to_string(), + ) { + return Ok(outcome); + } + continue; + } + }; + entries.sort_by(|left, right| left.file_name.cmp(&right.file_name)); + + for entry in entries { + if entry_count == options.max_entries { + outcome.truncated = true; + return Ok(outcome); + } + entry_count += 1; + + let path = match directory.join(&entry.file_name) { + Ok(path) => path, + Err(error) => { + if !push_walk_error( + &mut outcome, + &mut response_bytes, + directory.clone(), + error.to_string(), + ) { + return Ok(outcome); + } + continue; + } + }; + let metadata = match file_system.get_metadata(&path, sandbox).await { + Ok(metadata) => metadata, + Err(error) => { + if !push_walk_error(&mut outcome, &mut response_bytes, path, error.to_string()) + { + return Ok(outcome); + } + continue; + } + }; + if metadata.is_symlink { + continue; + } + + let kind = if metadata.is_directory { + WalkEntryKind::Directory + } else if metadata.is_file { + WalkEntryKind::File + } else { + continue; + }; + if !reserve_walk_response_bytes( + &mut outcome, + &mut response_bytes, + path.to_string().len(), + ) { + return Ok(outcome); + } + outcome.entries.push(WalkEntry { + path: path.clone(), + kind, + }); + + if kind == WalkEntryKind::Directory && depth < options.max_depth { + if directory_count == options.max_directories { + outcome.truncated = true; + } else { + directory_count += 1; + queue.push_back((path, depth + 1)); + } + } + } + } + + Ok(outcome) +} + +fn push_walk_error( + outcome: &mut WalkOutcome, + response_bytes: &mut usize, + path: PathUri, + message: String, +) -> bool { + let item_bytes = path.to_string().len().saturating_add(message.len()); + if !reserve_walk_response_bytes(outcome, response_bytes, item_bytes) { + return false; + } + outcome.errors.push(WalkError { path, message }); + true +} + +fn reserve_walk_response_bytes( + outcome: &mut WalkOutcome, + response_bytes: &mut usize, + content_bytes: usize, +) -> bool { + let item_bytes = content_bytes.saturating_add(WALK_RESPONSE_ITEM_OVERHEAD_BYTES); + let Some(total_bytes) = response_bytes.checked_add(item_bytes) else { + outcome.truncated = true; + return false; + }; + if total_bytes > MAX_WALK_RESPONSE_BYTES { + outcome.truncated = true; + return false; + } + *response_bytes = total_bytes; + true +}