Add a bounded filesystem walk RPC (#29841)

Stack 1 of 3. Follow-ups: #29842 and #29844.

## What changes

Adds a general bounded `fs/walk` operation to the exec server.

The operation returns file and directory entries plus recoverable
per-path errors. It skips symlinks, preserves the existing filesystem
sandbox routing, and enforces depth, directory, entry, and response-size
limits.

This PR only defines and wires the filesystem operation. It does not
change any callers yet.
This commit is contained in:
jif
2026-06-24 16:05:43 +01:00
committed by GitHub
Unverified
parent b4f0f3eff1
commit c14623d04c
13 changed files with 608 additions and 0 deletions
@@ -2,6 +2,8 @@ use std::collections::HashMap;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_file_system::FileSystemSandboxContext;
pub use codex_file_system::WalkOptions;
pub use codex_file_system::WalkOutcome;
use codex_network_proxy::ManagedNetworkSandboxContext;
use codex_protocol::config_types::ShellEnvironmentPolicyInherit;
use codex_shell_command::shell_detect::DetectedShell;
@@ -31,6 +33,7 @@ pub const FS_CREATE_DIRECTORY_METHOD: &str = "fs/createDirectory";
pub const FS_GET_METADATA_METHOD: &str = "fs/getMetadata";
pub const FS_CANONICALIZE_METHOD: &str = "fs/canonicalize";
pub const FS_READ_DIRECTORY_METHOD: &str = "fs/readDirectory";
pub const FS_WALK_METHOD: &str = "fs/walk";
pub const FS_REMOVE_METHOD: &str = "fs/remove";
pub const FS_COPY_METHOD: &str = "fs/copy";
/// JSON-RPC request method for executor-side HTTP requests.
@@ -370,6 +373,16 @@ pub struct FsReadDirectoryResponse {
pub entries: Vec<FsReadDirectoryEntry>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FsWalkParams {
pub path: PathUri,
pub options: WalkOptions,
pub sandbox: Option<FileSystemSandboxContext>,
}
pub type FsWalkResponse = WalkOutcome;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FsRemoveParams {
+7
View File
@@ -58,6 +58,7 @@ use crate::protocol::FS_READ_BLOCK_METHOD;
use crate::protocol::FS_READ_DIRECTORY_METHOD;
use crate::protocol::FS_READ_FILE_METHOD;
use crate::protocol::FS_REMOVE_METHOD;
use crate::protocol::FS_WALK_METHOD;
use crate::protocol::FS_WRITE_FILE_METHOD;
use crate::protocol::FsCanonicalizeParams;
use crate::protocol::FsCanonicalizeResponse;
@@ -79,6 +80,8 @@ use crate::protocol::FsReadFileParams;
use crate::protocol::FsReadFileResponse;
use crate::protocol::FsRemoveParams;
use crate::protocol::FsRemoveResponse;
use crate::protocol::FsWalkParams;
use crate::protocol::FsWalkResponse;
use crate::protocol::FsWriteFileParams;
use crate::protocol::FsWriteFileResponse;
use crate::protocol::HTTP_REQUEST_BODY_DELTA_METHOD;
@@ -620,6 +623,10 @@ impl ExecServerClient {
self.call(FS_READ_DIRECTORY_METHOD, &params).await
}
pub async fn fs_walk(&self, params: FsWalkParams) -> Result<FsWalkResponse, ExecServerError> {
self.call(FS_WALK_METHOD, &params).await
}
pub async fn fs_remove(
&self,
params: FsRemoveParams,
+22
View File
@@ -17,6 +17,7 @@ use crate::protocol::FS_GET_METADATA_METHOD;
use crate::protocol::FS_READ_DIRECTORY_METHOD;
use crate::protocol::FS_READ_FILE_METHOD;
use crate::protocol::FS_REMOVE_METHOD;
use crate::protocol::FS_WALK_METHOD;
use crate::protocol::FS_WRITE_FILE_METHOD;
use crate::protocol::FsCanonicalizeParams;
use crate::protocol::FsCanonicalizeResponse;
@@ -33,6 +34,8 @@ use crate::protocol::FsReadFileParams;
use crate::protocol::FsReadFileResponse;
use crate::protocol::FsRemoveParams;
use crate::protocol::FsRemoveResponse;
use crate::protocol::FsWalkParams;
use crate::protocol::FsWalkResponse;
use crate::protocol::FsWriteFileParams;
use crate::protocol::FsWriteFileResponse;
use crate::rpc::internal_error;
@@ -56,6 +59,8 @@ pub(crate) enum FsHelperRequest {
Canonicalize(FsCanonicalizeParams),
#[serde(rename = "fs/readDirectory")]
ReadDirectory(FsReadDirectoryParams),
#[serde(rename = "fs/walk")]
Walk(FsWalkParams),
#[serde(rename = "fs/remove")]
Remove(FsRemoveParams),
#[serde(rename = "fs/copy")]
@@ -84,6 +89,8 @@ pub(crate) enum FsHelperPayload {
Canonicalize(FsCanonicalizeResponse),
#[serde(rename = "fs/readDirectory")]
ReadDirectory(FsReadDirectoryResponse),
#[serde(rename = "fs/walk")]
Walk(FsWalkResponse),
#[serde(rename = "fs/remove")]
Remove(FsRemoveResponse),
#[serde(rename = "fs/copy")]
@@ -99,6 +106,7 @@ impl FsHelperPayload {
Self::GetMetadata(_) => FS_GET_METADATA_METHOD,
Self::Canonicalize(_) => FS_CANONICALIZE_METHOD,
Self::ReadDirectory(_) => FS_READ_DIRECTORY_METHOD,
Self::Walk(_) => FS_WALK_METHOD,
Self::Remove(_) => FS_REMOVE_METHOD,
Self::Copy(_) => FS_COPY_METHOD,
}
@@ -162,6 +170,13 @@ impl FsHelperPayload {
}
}
pub(crate) fn expect_walk(self) -> Result<FsWalkResponse, JSONRPCErrorError> {
match self {
Self::Walk(response) => Ok(response),
other => Err(unexpected_response(FS_WALK_METHOD, other.operation())),
}
}
pub(crate) fn expect_remove(self) -> Result<FsRemoveResponse, JSONRPCErrorError> {
match self {
Self::Remove(response) => Ok(response),
@@ -263,6 +278,13 @@ pub(crate) async fn run_direct_request(
entries,
}))
}
FsHelperRequest::Walk(params) => {
let outcome = file_system
.walk(&params.path, params.options, /*sandbox*/ None)
.await
.map_err(map_fs_error)?;
Ok(FsHelperPayload::Walk(outcome))
}
FsHelperRequest::Remove(params) => {
file_system
.remove(
+7
View File
@@ -51,6 +51,11 @@ pub use codex_file_system::FileSystemResult;
pub use codex_file_system::FileSystemSandboxContext;
pub use codex_file_system::ReadDirectoryEntry;
pub use codex_file_system::RemoveOptions;
pub use codex_file_system::WalkEntry;
pub use codex_file_system::WalkEntryKind;
pub use codex_file_system::WalkError;
pub use codex_file_system::WalkOptions;
pub use codex_file_system::WalkOutcome;
pub use environment::CODEX_EXEC_SERVER_NOISE_AUTH_TOKEN_ENV_VAR;
pub use environment::CODEX_EXEC_SERVER_NOISE_CHATGPT_ACCOUNT_ID_ENV_VAR;
pub use environment::CODEX_EXEC_SERVER_NOISE_ENVIRONMENT_ID_ENV_VAR;
@@ -113,6 +118,8 @@ pub use protocol::FsReadFileParams;
pub use protocol::FsReadFileResponse;
pub use protocol::FsRemoveParams;
pub use protocol::FsRemoveResponse;
pub use protocol::FsWalkParams;
pub use protocol::FsWalkResponse;
pub use protocol::FsWriteFileParams;
pub use protocol::FsWriteFileResponse;
pub use protocol::HttpHeader;
@@ -22,6 +22,8 @@ use crate::FileSystemResult;
use crate::FileSystemSandboxContext;
use crate::ReadDirectoryEntry;
use crate::RemoveOptions;
use crate::WalkOptions;
use crate::WalkOutcome;
use crate::regular_file;
use crate::sandboxed_file_system::SandboxedFileSystem;
@@ -170,6 +172,16 @@ impl LocalFileSystem {
file_system.read_directory(path, sandbox).await
}
async fn walk(
&self,
path: &PathUri,
options: WalkOptions,
sandbox: Option<&FileSystemSandboxContext>,
) -> FileSystemResult<WalkOutcome> {
let (file_system, sandbox) = self.file_system_for(sandbox)?;
file_system.walk(path, options, sandbox).await
}
async fn remove(
&self,
path: &PathUri,
@@ -255,6 +267,15 @@ impl ExecutorFileSystem for LocalFileSystem {
Box::pin(LocalFileSystem::read_directory(self, path, sandbox))
}
fn walk<'a>(
&'a self,
path: &'a PathUri,
options: WalkOptions,
sandbox: Option<&'a FileSystemSandboxContext>,
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
Box::pin(LocalFileSystem::walk(self, path, options, sandbox))
}
fn remove<'a>(
&'a self,
path: &'a PathUri,
@@ -15,6 +15,8 @@ use crate::FileSystemResult;
use crate::FileSystemSandboxContext;
use crate::ReadDirectoryEntry;
use crate::RemoveOptions;
use crate::WalkOptions;
use crate::WalkOutcome;
use crate::client::LazyRemoteExecServerClient;
use crate::protocol::FsCanonicalizeParams;
use crate::protocol::FsCopyParams;
@@ -23,6 +25,7 @@ use crate::protocol::FsGetMetadataParams;
use crate::protocol::FsReadDirectoryParams;
use crate::protocol::FsReadFileParams;
use crate::protocol::FsRemoveParams;
use crate::protocol::FsWalkParams;
use crate::protocol::FsWriteFileParams;
const INVALID_REQUEST_ERROR_CODE: i64 = -32600;
@@ -183,6 +186,25 @@ impl RemoteFileSystem {
.collect())
}
async fn walk(
&self,
path: &PathUri,
options: WalkOptions,
sandbox: Option<&FileSystemSandboxContext>,
) -> FileSystemResult<WalkOutcome> {
trace!("remote fs walk");
let client = self.client.get().await.map_err(map_remote_error)?;
let response = client
.fs_walk(FsWalkParams {
path: path.clone(),
options,
sandbox: remote_sandbox_context(sandbox),
})
.await
.map_err(map_remote_error)?;
Ok(response)
}
async fn remove(
&self,
path: &PathUri,
@@ -286,6 +308,15 @@ impl ExecutorFileSystem for RemoteFileSystem {
Box::pin(RemoteFileSystem::read_directory(self, path, sandbox))
}
fn walk<'a>(
&'a self,
path: &'a PathUri,
options: WalkOptions,
sandbox: Option<&'a FileSystemSandboxContext>,
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
Box::pin(RemoteFileSystem::walk(self, path, options, sandbox))
}
fn remove<'a>(
&'a self,
path: &'a PathUri,
@@ -15,6 +15,8 @@ use crate::FileSystemResult;
use crate::FileSystemSandboxContext;
use crate::ReadDirectoryEntry;
use crate::RemoveOptions;
use crate::WalkOptions;
use crate::WalkOutcome;
use crate::fs_helper::FsHelperPayload;
use crate::fs_helper::FsHelperRequest;
use crate::fs_sandbox::FileSystemSandboxRunner;
@@ -25,6 +27,7 @@ use crate::protocol::FsGetMetadataParams;
use crate::protocol::FsReadDirectoryParams;
use crate::protocol::FsReadFileParams;
use crate::protocol::FsRemoveParams;
use crate::protocol::FsWalkParams;
use crate::protocol::FsWriteFileParams;
#[derive(Clone)]
@@ -200,6 +203,29 @@ impl SandboxedFileSystem {
.collect())
}
async fn walk(
&self,
path: &PathUri,
options: WalkOptions,
sandbox: Option<&FileSystemSandboxContext>,
) -> FileSystemResult<WalkOutcome> {
let sandbox = require_platform_sandbox(sandbox)?;
validate_native_path(path)?;
let response = self
.run_sandboxed(
sandbox,
FsHelperRequest::Walk(FsWalkParams {
path: path.clone(),
options,
sandbox: None,
}),
)
.await?
.expect_walk()
.map_err(map_sandbox_error)?;
Ok(response)
}
async fn remove(
&self,
path: &PathUri,
@@ -317,6 +343,15 @@ impl ExecutorFileSystem for SandboxedFileSystem {
Box::pin(SandboxedFileSystem::read_directory(self, path, sandbox))
}
fn walk<'a>(
&'a self,
path: &'a PathUri,
options: WalkOptions,
sandbox: Option<&'a FileSystemSandboxContext>,
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
Box::pin(SandboxedFileSystem::walk(self, path, options, sandbox))
}
fn remove<'a>(
&'a self,
path: &'a PathUri,
@@ -33,6 +33,8 @@ use crate::protocol::FsReadFileParams;
use crate::protocol::FsReadFileResponse;
use crate::protocol::FsRemoveParams;
use crate::protocol::FsRemoveResponse;
use crate::protocol::FsWalkParams;
use crate::protocol::FsWalkResponse;
use crate::protocol::FsWriteFileParams;
use crate::protocol::FsWriteFileResponse;
use crate::rpc::internal_error;
@@ -198,6 +200,16 @@ impl FileSystemHandler {
Ok(FsReadDirectoryResponse { entries })
}
pub(crate) async fn walk(
&self,
params: FsWalkParams,
) -> Result<FsWalkResponse, JSONRPCErrorError> {
self.file_system
.walk(&params.path, params.options, params.sandbox.as_ref())
.await
.map_err(map_fs_error)
}
pub(crate) async fn remove(
&self,
params: FsRemoveParams,
@@ -37,6 +37,8 @@ use crate::protocol::FsReadFileParams;
use crate::protocol::FsReadFileResponse;
use crate::protocol::FsRemoveParams;
use crate::protocol::FsRemoveResponse;
use crate::protocol::FsWalkParams;
use crate::protocol::FsWalkResponse;
use crate::protocol::FsWriteFileParams;
use crate::protocol::FsWriteFileResponse;
use crate::protocol::HttpRequestParams;
@@ -311,6 +313,14 @@ impl ExecServerHandler {
self.file_system.read_directory(params).await
}
pub(crate) async fn fs_walk(
&self,
params: FsWalkParams,
) -> Result<FsWalkResponse, JSONRPCErrorError> {
self.require_initialized_for("filesystem")?;
self.file_system.walk(params).await
}
pub(crate) async fn fs_remove(
&self,
params: FsRemoveParams,
@@ -17,6 +17,7 @@ use crate::protocol::FS_READ_BLOCK_METHOD;
use crate::protocol::FS_READ_DIRECTORY_METHOD;
use crate::protocol::FS_READ_FILE_METHOD;
use crate::protocol::FS_REMOVE_METHOD;
use crate::protocol::FS_WALK_METHOD;
use crate::protocol::FS_WRITE_FILE_METHOD;
use crate::protocol::FsCanonicalizeParams;
use crate::protocol::FsCloseParams;
@@ -28,6 +29,7 @@ use crate::protocol::FsReadBlockParams;
use crate::protocol::FsReadDirectoryParams;
use crate::protocol::FsReadFileParams;
use crate::protocol::FsRemoveParams;
use crate::protocol::FsWalkParams;
use crate::protocol::FsWriteFileParams;
use crate::protocol::HTTP_REQUEST_METHOD;
use crate::protocol::HttpRequestParams;
@@ -147,6 +149,12 @@ pub(crate) fn build_router() -> RpcRouter<ExecServerHandler> {
handler.fs_read_directory(params).await
},
);
router.request(
FS_WALK_METHOD,
|handler: Arc<ExecServerHandler>, params: FsWalkParams| async move {
handler.fs_walk(params).await
},
);
router.request(
FS_REMOVE_METHOD,
|handler: Arc<ExecServerHandler>, params: FsRemoveParams| async move {
@@ -6,6 +6,10 @@ use codex_exec_server::FILE_READ_CHUNK_SIZE;
use codex_exec_server::FileMetadata;
use codex_exec_server::ReadDirectoryEntry;
use codex_exec_server::RemoveOptions;
use codex_exec_server::WalkEntry;
use codex_exec_server::WalkEntryKind;
use codex_exec_server::WalkOptions;
use codex_exec_server::WalkOutcome;
use codex_protocol::models::AdditionalPermissionProfile;
use codex_protocol::models::FileSystemPermissions;
use codex_protocol::models::PermissionProfile;
@@ -373,6 +377,179 @@ async fn file_system_read_directory_lists_entries(
Ok(())
}
#[test_case(FileSystemImplementation::Local ; "local")]
#[test_case(FileSystemImplementation::Remote ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn file_system_walk_returns_a_bounded_tree(
implementation: FileSystemImplementation,
) -> Result<()> {
let context = create_file_system_context(implementation).await?;
let file_system = context.file_system;
let tmp = TempDir::new()?;
let source_dir = tmp.path().join("source");
let nested_dir = source_dir.join("nested");
std::fs::create_dir_all(&nested_dir)?;
std::fs::write(source_dir.join("root.txt"), "root")?;
std::fs::write(nested_dir.join("note.txt"), "nested")?;
let source_uri = PathUri::from_host_native_path(&source_dir)?;
let outcome = file_system
.walk(
&source_uri,
WalkOptions {
max_depth: 4,
max_directories: 10,
max_entries: 10,
},
/*sandbox*/ None,
)
.await
.with_context(|| format!("mode={implementation}"))?;
assert_eq!(
outcome,
WalkOutcome {
entries: vec![
WalkEntry {
path: PathUri::from_host_native_path(&nested_dir)?,
kind: WalkEntryKind::Directory,
},
WalkEntry {
path: PathUri::from_host_native_path(source_dir.join("root.txt"))?,
kind: WalkEntryKind::File,
},
WalkEntry {
path: PathUri::from_host_native_path(nested_dir.join("note.txt"))?,
kind: WalkEntryKind::File,
},
],
errors: Vec::new(),
truncated: false,
}
);
let root_entries = vec![
WalkEntry {
path: PathUri::from_host_native_path(&nested_dir)?,
kind: WalkEntryKind::Directory,
},
WalkEntry {
path: PathUri::from_host_native_path(source_dir.join("root.txt"))?,
kind: WalkEntryKind::File,
},
];
let shallow = file_system
.walk(
&source_uri,
WalkOptions {
max_depth: 0,
max_directories: 10,
max_entries: 10,
},
/*sandbox*/ None,
)
.await
.with_context(|| format!("mode={implementation}"))?;
assert_eq!(
shallow,
WalkOutcome {
entries: root_entries.clone(),
errors: Vec::new(),
truncated: false,
}
);
let directory_bounded = file_system
.walk(
&source_uri,
WalkOptions {
max_depth: 4,
max_directories: 1,
max_entries: 10,
},
/*sandbox*/ None,
)
.await
.with_context(|| format!("mode={implementation}"))?;
assert_eq!(
directory_bounded,
WalkOutcome {
entries: root_entries,
errors: Vec::new(),
truncated: true,
}
);
let bounded = file_system
.walk(
&source_uri,
WalkOptions {
max_depth: 4,
max_directories: 10,
max_entries: 1,
},
/*sandbox*/ None,
)
.await
.with_context(|| format!("mode={implementation}"))?;
assert_eq!(
bounded,
WalkOutcome {
entries: vec![WalkEntry {
path: PathUri::from_host_native_path(&nested_dir)?,
kind: WalkEntryKind::Directory,
}],
errors: Vec::new(),
truncated: true,
}
);
Ok(())
}
#[test_case(FileSystemImplementation::Local ; "local")]
#[test_case(FileSystemImplementation::Remote ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn file_system_walk_honors_read_sandbox(
implementation: FileSystemImplementation,
) -> Result<()> {
let context = create_file_system_context(implementation).await?;
let file_system = context.file_system;
let tmp = TempDir::new()?;
let source_dir = tmp.path().join("source");
let file_path = source_dir.join("note.txt");
std::fs::create_dir_all(&source_dir)?;
std::fs::write(&file_path, "sandboxed")?;
let sandbox = read_only_sandbox(source_dir.clone());
let outcome = file_system
.walk(
&PathUri::from_host_native_path(&source_dir)?,
WalkOptions {
max_depth: 1,
max_directories: 2,
max_entries: 2,
},
Some(&sandbox),
)
.await
.with_context(|| format!("mode={implementation}"))?;
assert_eq!(
outcome,
WalkOutcome {
entries: vec![WalkEntry {
path: PathUri::from_host_native_path(file_path)?,
kind: WalkEntryKind::File,
}],
errors: Vec::new(),
truncated: false,
}
);
Ok(())
}
#[test_case(FileSystemImplementation::Local ; "local")]
#[test_case(FileSystemImplementation::Remote ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -24,6 +24,10 @@ use codex_exec_server::CreateDirectoryOptions;
use codex_exec_server::Environment;
use codex_exec_server::FileMetadata;
use codex_exec_server::RemoveOptions;
use codex_exec_server::WalkEntry;
use codex_exec_server::WalkEntryKind;
use codex_exec_server::WalkOptions;
use codex_exec_server::WalkOutcome;
use codex_utils_path_uri::PathUri;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
@@ -266,6 +270,54 @@ async fn file_system_get_metadata_reports_symlink_targets(
Ok(())
}
#[test_case(FileSystemImplementation::Local ; "local")]
#[test_case(FileSystemImplementation::Remote ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn file_system_walk_ignores_symlinks(implementation: FileSystemImplementation) -> Result<()> {
let context = create_file_system_context(implementation).await?;
let file_system = context.file_system;
let tmp = TempDir::new()?;
let root = tmp.path().join("root");
let target = root.join("target");
let target_file = target.join("note.txt");
std::fs::create_dir_all(&target)?;
std::fs::write(&target_file, "target")?;
symlink(&target, root.join("target-link"))?;
let outcome = file_system
.walk(
&PathUri::from_host_native_path(&root)?,
WalkOptions {
max_depth: 2,
max_directories: 4,
max_entries: 8,
},
/*sandbox*/ None,
)
.await
.with_context(|| format!("mode={implementation}"))?;
assert_eq!(
outcome,
WalkOutcome {
entries: vec![
WalkEntry {
path: PathUri::from_host_native_path(&target)?,
kind: WalkEntryKind::Directory,
},
WalkEntry {
path: PathUri::from_host_native_path(target_file)?,
kind: WalkEntryKind::File,
},
],
errors: Vec::new(),
truncated: false,
}
);
Ok(())
}
#[test_case(FileSystemImplementation::Local ; "local")]
#[test_case(FileSystemImplementation::Remote ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+213
View File
@@ -12,6 +12,7 @@ use codex_protocol::protocol::SandboxPolicy;
use codex_utils_absolute_path::AbsolutePathBuf;
use codex_utils_path_uri::PathUri;
use futures::Stream;
use std::collections::VecDeque;
use std::future::Future;
use std::io;
use std::path::Path;
@@ -21,6 +22,11 @@ use std::task::Poll;
/// Maximum chunk size returned by [`ExecutorFileSystem::read_file_stream`].
pub const FILE_READ_CHUNK_SIZE: usize = 1024 * 1024;
const MAX_WALK_DEPTH: usize = 64;
const MAX_WALK_DIRECTORIES: usize = 10_000;
const MAX_WALK_ENTRIES: usize = 50_000;
const MAX_WALK_RESPONSE_BYTES: usize = 4 * 1024 * 1024;
const WALK_RESPONSE_ITEM_OVERHEAD_BYTES: usize = 64;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct CreateDirectoryOptions {
@@ -56,6 +62,51 @@ pub struct ReadDirectoryEntry {
pub is_file: bool,
}
/// Bounds for a recursive filesystem walk.
#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct WalkOptions {
/// Maximum directory depth below the root that may be traversed.
pub max_depth: usize,
/// Maximum number of directories that may be traversed, including the root.
pub max_directories: usize,
/// Maximum number of directory entries that may be examined.
pub max_entries: usize,
}
/// Type of a filesystem entry returned by a walk.
#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub enum WalkEntryKind {
Directory,
File,
}
/// One non-symlink entry returned by a walk.
#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct WalkEntry {
pub path: PathUri,
pub kind: WalkEntryKind,
}
/// A descendant that could not be inspected during a walk.
#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct WalkError {
pub path: PathUri,
pub message: String,
}
/// Entries and recoverable errors collected by a bounded walk.
#[derive(Clone, Debug, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct WalkOutcome {
pub entries: Vec<WalkEntry>,
pub errors: Vec<WalkError>,
pub truncated: bool,
}
#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FileSystemSandboxContext {
@@ -249,6 +300,16 @@ pub trait ExecutorFileSystem: Send + Sync {
sandbox: Option<&'a FileSystemSandboxContext>,
) -> ExecutorFileSystemFuture<'a, Vec<ReadDirectoryEntry>>;
/// Recursively lists descendants, skipping symlinks.
fn walk<'a>(
&'a self,
path: &'a PathUri,
options: WalkOptions,
sandbox: Option<&'a FileSystemSandboxContext>,
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
Box::pin(walk(self, path, options, sandbox))
}
fn remove<'a>(
&'a self,
path: &'a PathUri,
@@ -264,3 +325,155 @@ pub trait ExecutorFileSystem: Send + Sync {
sandbox: Option<&'a FileSystemSandboxContext>,
) -> ExecutorFileSystemFuture<'a, ()>;
}
async fn walk<F: ExecutorFileSystem + ?Sized>(
file_system: &F,
root: &PathUri,
options: WalkOptions,
sandbox: Option<&FileSystemSandboxContext>,
) -> FileSystemResult<WalkOutcome> {
if options.max_directories == 0 || options.max_entries == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"filesystem walk limits must be greater than zero",
));
}
if options.max_depth > MAX_WALK_DEPTH
|| options.max_directories > MAX_WALK_DIRECTORIES
|| options.max_entries > MAX_WALK_ENTRIES
{
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!(
"filesystem walk limits exceed maximums: depth={MAX_WALK_DEPTH}, directories={MAX_WALK_DIRECTORIES}, entries={MAX_WALK_ENTRIES}"
),
));
}
let root_metadata = file_system.get_metadata(root, sandbox).await?;
if root_metadata.is_symlink || !root_metadata.is_directory {
return Ok(WalkOutcome::default());
}
let mut outcome = WalkOutcome::default();
let mut queue = VecDeque::from([(root.clone(), 0usize)]);
let mut directory_count = 1usize;
let mut entry_count = 0usize;
let mut response_bytes = 0usize;
while let Some((directory, depth)) = queue.pop_front() {
let mut entries = match file_system.read_directory(&directory, sandbox).await {
Ok(entries) => entries,
Err(error) => {
if !push_walk_error(
&mut outcome,
&mut response_bytes,
directory,
error.to_string(),
) {
return Ok(outcome);
}
continue;
}
};
entries.sort_by(|left, right| left.file_name.cmp(&right.file_name));
for entry in entries {
if entry_count == options.max_entries {
outcome.truncated = true;
return Ok(outcome);
}
entry_count += 1;
let path = match directory.join(&entry.file_name) {
Ok(path) => path,
Err(error) => {
if !push_walk_error(
&mut outcome,
&mut response_bytes,
directory.clone(),
error.to_string(),
) {
return Ok(outcome);
}
continue;
}
};
let metadata = match file_system.get_metadata(&path, sandbox).await {
Ok(metadata) => metadata,
Err(error) => {
if !push_walk_error(&mut outcome, &mut response_bytes, path, error.to_string())
{
return Ok(outcome);
}
continue;
}
};
if metadata.is_symlink {
continue;
}
let kind = if metadata.is_directory {
WalkEntryKind::Directory
} else if metadata.is_file {
WalkEntryKind::File
} else {
continue;
};
if !reserve_walk_response_bytes(
&mut outcome,
&mut response_bytes,
path.to_string().len(),
) {
return Ok(outcome);
}
outcome.entries.push(WalkEntry {
path: path.clone(),
kind,
});
if kind == WalkEntryKind::Directory && depth < options.max_depth {
if directory_count == options.max_directories {
outcome.truncated = true;
} else {
directory_count += 1;
queue.push_back((path, depth + 1));
}
}
}
}
Ok(outcome)
}
fn push_walk_error(
outcome: &mut WalkOutcome,
response_bytes: &mut usize,
path: PathUri,
message: String,
) -> bool {
let item_bytes = path.to_string().len().saturating_add(message.len());
if !reserve_walk_response_bytes(outcome, response_bytes, item_bytes) {
return false;
}
outcome.errors.push(WalkError { path, message });
true
}
fn reserve_walk_response_bytes(
outcome: &mut WalkOutcome,
response_bytes: &mut usize,
content_bytes: usize,
) -> bool {
let item_bytes = content_bytes.saturating_add(WALK_RESPONSE_ITEM_OVERHEAD_BYTES);
let Some(total_bytes) = response_bytes.checked_add(item_bytes) else {
outcome.truncated = true;
return false;
};
if total_bytes > MAX_WALK_RESPONSE_BYTES {
outcome.truncated = true;
return false;
}
*response_bytes = total_bytes;
true
}