mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
Add a bounded filesystem walk RPC (#29841)
Stack 1 of 3. Follow-ups: #29842 and #29844. ## What changes Adds a general bounded `fs/walk` operation to the exec server. The operation returns file and directory entries plus recoverable per-path errors. It skips symlinks, preserves the existing filesystem sandbox routing, and enforces depth, directory, entry, and response-size limits. This PR only defines and wires the filesystem operation. It does not change any callers yet.
This commit is contained in:
@@ -2,6 +2,8 @@ use std::collections::HashMap;
|
||||
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_file_system::FileSystemSandboxContext;
|
||||
pub use codex_file_system::WalkOptions;
|
||||
pub use codex_file_system::WalkOutcome;
|
||||
use codex_network_proxy::ManagedNetworkSandboxContext;
|
||||
use codex_protocol::config_types::ShellEnvironmentPolicyInherit;
|
||||
use codex_shell_command::shell_detect::DetectedShell;
|
||||
@@ -31,6 +33,7 @@ pub const FS_CREATE_DIRECTORY_METHOD: &str = "fs/createDirectory";
|
||||
pub const FS_GET_METADATA_METHOD: &str = "fs/getMetadata";
|
||||
pub const FS_CANONICALIZE_METHOD: &str = "fs/canonicalize";
|
||||
pub const FS_READ_DIRECTORY_METHOD: &str = "fs/readDirectory";
|
||||
pub const FS_WALK_METHOD: &str = "fs/walk";
|
||||
pub const FS_REMOVE_METHOD: &str = "fs/remove";
|
||||
pub const FS_COPY_METHOD: &str = "fs/copy";
|
||||
/// JSON-RPC request method for executor-side HTTP requests.
|
||||
@@ -370,6 +373,16 @@ pub struct FsReadDirectoryResponse {
|
||||
pub entries: Vec<FsReadDirectoryEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FsWalkParams {
|
||||
pub path: PathUri,
|
||||
pub options: WalkOptions,
|
||||
pub sandbox: Option<FileSystemSandboxContext>,
|
||||
}
|
||||
|
||||
pub type FsWalkResponse = WalkOutcome;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FsRemoveParams {
|
||||
|
||||
@@ -58,6 +58,7 @@ use crate::protocol::FS_READ_BLOCK_METHOD;
|
||||
use crate::protocol::FS_READ_DIRECTORY_METHOD;
|
||||
use crate::protocol::FS_READ_FILE_METHOD;
|
||||
use crate::protocol::FS_REMOVE_METHOD;
|
||||
use crate::protocol::FS_WALK_METHOD;
|
||||
use crate::protocol::FS_WRITE_FILE_METHOD;
|
||||
use crate::protocol::FsCanonicalizeParams;
|
||||
use crate::protocol::FsCanonicalizeResponse;
|
||||
@@ -79,6 +80,8 @@ use crate::protocol::FsReadFileParams;
|
||||
use crate::protocol::FsReadFileResponse;
|
||||
use crate::protocol::FsRemoveParams;
|
||||
use crate::protocol::FsRemoveResponse;
|
||||
use crate::protocol::FsWalkParams;
|
||||
use crate::protocol::FsWalkResponse;
|
||||
use crate::protocol::FsWriteFileParams;
|
||||
use crate::protocol::FsWriteFileResponse;
|
||||
use crate::protocol::HTTP_REQUEST_BODY_DELTA_METHOD;
|
||||
@@ -620,6 +623,10 @@ impl ExecServerClient {
|
||||
self.call(FS_READ_DIRECTORY_METHOD, ¶ms).await
|
||||
}
|
||||
|
||||
pub async fn fs_walk(&self, params: FsWalkParams) -> Result<FsWalkResponse, ExecServerError> {
|
||||
self.call(FS_WALK_METHOD, ¶ms).await
|
||||
}
|
||||
|
||||
pub async fn fs_remove(
|
||||
&self,
|
||||
params: FsRemoveParams,
|
||||
|
||||
@@ -17,6 +17,7 @@ use crate::protocol::FS_GET_METADATA_METHOD;
|
||||
use crate::protocol::FS_READ_DIRECTORY_METHOD;
|
||||
use crate::protocol::FS_READ_FILE_METHOD;
|
||||
use crate::protocol::FS_REMOVE_METHOD;
|
||||
use crate::protocol::FS_WALK_METHOD;
|
||||
use crate::protocol::FS_WRITE_FILE_METHOD;
|
||||
use crate::protocol::FsCanonicalizeParams;
|
||||
use crate::protocol::FsCanonicalizeResponse;
|
||||
@@ -33,6 +34,8 @@ use crate::protocol::FsReadFileParams;
|
||||
use crate::protocol::FsReadFileResponse;
|
||||
use crate::protocol::FsRemoveParams;
|
||||
use crate::protocol::FsRemoveResponse;
|
||||
use crate::protocol::FsWalkParams;
|
||||
use crate::protocol::FsWalkResponse;
|
||||
use crate::protocol::FsWriteFileParams;
|
||||
use crate::protocol::FsWriteFileResponse;
|
||||
use crate::rpc::internal_error;
|
||||
@@ -56,6 +59,8 @@ pub(crate) enum FsHelperRequest {
|
||||
Canonicalize(FsCanonicalizeParams),
|
||||
#[serde(rename = "fs/readDirectory")]
|
||||
ReadDirectory(FsReadDirectoryParams),
|
||||
#[serde(rename = "fs/walk")]
|
||||
Walk(FsWalkParams),
|
||||
#[serde(rename = "fs/remove")]
|
||||
Remove(FsRemoveParams),
|
||||
#[serde(rename = "fs/copy")]
|
||||
@@ -84,6 +89,8 @@ pub(crate) enum FsHelperPayload {
|
||||
Canonicalize(FsCanonicalizeResponse),
|
||||
#[serde(rename = "fs/readDirectory")]
|
||||
ReadDirectory(FsReadDirectoryResponse),
|
||||
#[serde(rename = "fs/walk")]
|
||||
Walk(FsWalkResponse),
|
||||
#[serde(rename = "fs/remove")]
|
||||
Remove(FsRemoveResponse),
|
||||
#[serde(rename = "fs/copy")]
|
||||
@@ -99,6 +106,7 @@ impl FsHelperPayload {
|
||||
Self::GetMetadata(_) => FS_GET_METADATA_METHOD,
|
||||
Self::Canonicalize(_) => FS_CANONICALIZE_METHOD,
|
||||
Self::ReadDirectory(_) => FS_READ_DIRECTORY_METHOD,
|
||||
Self::Walk(_) => FS_WALK_METHOD,
|
||||
Self::Remove(_) => FS_REMOVE_METHOD,
|
||||
Self::Copy(_) => FS_COPY_METHOD,
|
||||
}
|
||||
@@ -162,6 +170,13 @@ impl FsHelperPayload {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn expect_walk(self) -> Result<FsWalkResponse, JSONRPCErrorError> {
|
||||
match self {
|
||||
Self::Walk(response) => Ok(response),
|
||||
other => Err(unexpected_response(FS_WALK_METHOD, other.operation())),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn expect_remove(self) -> Result<FsRemoveResponse, JSONRPCErrorError> {
|
||||
match self {
|
||||
Self::Remove(response) => Ok(response),
|
||||
@@ -263,6 +278,13 @@ pub(crate) async fn run_direct_request(
|
||||
entries,
|
||||
}))
|
||||
}
|
||||
FsHelperRequest::Walk(params) => {
|
||||
let outcome = file_system
|
||||
.walk(¶ms.path, params.options, /*sandbox*/ None)
|
||||
.await
|
||||
.map_err(map_fs_error)?;
|
||||
Ok(FsHelperPayload::Walk(outcome))
|
||||
}
|
||||
FsHelperRequest::Remove(params) => {
|
||||
file_system
|
||||
.remove(
|
||||
|
||||
@@ -51,6 +51,11 @@ pub use codex_file_system::FileSystemResult;
|
||||
pub use codex_file_system::FileSystemSandboxContext;
|
||||
pub use codex_file_system::ReadDirectoryEntry;
|
||||
pub use codex_file_system::RemoveOptions;
|
||||
pub use codex_file_system::WalkEntry;
|
||||
pub use codex_file_system::WalkEntryKind;
|
||||
pub use codex_file_system::WalkError;
|
||||
pub use codex_file_system::WalkOptions;
|
||||
pub use codex_file_system::WalkOutcome;
|
||||
pub use environment::CODEX_EXEC_SERVER_NOISE_AUTH_TOKEN_ENV_VAR;
|
||||
pub use environment::CODEX_EXEC_SERVER_NOISE_CHATGPT_ACCOUNT_ID_ENV_VAR;
|
||||
pub use environment::CODEX_EXEC_SERVER_NOISE_ENVIRONMENT_ID_ENV_VAR;
|
||||
@@ -113,6 +118,8 @@ pub use protocol::FsReadFileParams;
|
||||
pub use protocol::FsReadFileResponse;
|
||||
pub use protocol::FsRemoveParams;
|
||||
pub use protocol::FsRemoveResponse;
|
||||
pub use protocol::FsWalkParams;
|
||||
pub use protocol::FsWalkResponse;
|
||||
pub use protocol::FsWriteFileParams;
|
||||
pub use protocol::FsWriteFileResponse;
|
||||
pub use protocol::HttpHeader;
|
||||
|
||||
@@ -22,6 +22,8 @@ use crate::FileSystemResult;
|
||||
use crate::FileSystemSandboxContext;
|
||||
use crate::ReadDirectoryEntry;
|
||||
use crate::RemoveOptions;
|
||||
use crate::WalkOptions;
|
||||
use crate::WalkOutcome;
|
||||
use crate::regular_file;
|
||||
use crate::sandboxed_file_system::SandboxedFileSystem;
|
||||
|
||||
@@ -170,6 +172,16 @@ impl LocalFileSystem {
|
||||
file_system.read_directory(path, sandbox).await
|
||||
}
|
||||
|
||||
async fn walk(
|
||||
&self,
|
||||
path: &PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&FileSystemSandboxContext>,
|
||||
) -> FileSystemResult<WalkOutcome> {
|
||||
let (file_system, sandbox) = self.file_system_for(sandbox)?;
|
||||
file_system.walk(path, options, sandbox).await
|
||||
}
|
||||
|
||||
async fn remove(
|
||||
&self,
|
||||
path: &PathUri,
|
||||
@@ -255,6 +267,15 @@ impl ExecutorFileSystem for LocalFileSystem {
|
||||
Box::pin(LocalFileSystem::read_directory(self, path, sandbox))
|
||||
}
|
||||
|
||||
fn walk<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
|
||||
Box::pin(LocalFileSystem::walk(self, path, options, sandbox))
|
||||
}
|
||||
|
||||
fn remove<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
|
||||
@@ -15,6 +15,8 @@ use crate::FileSystemResult;
|
||||
use crate::FileSystemSandboxContext;
|
||||
use crate::ReadDirectoryEntry;
|
||||
use crate::RemoveOptions;
|
||||
use crate::WalkOptions;
|
||||
use crate::WalkOutcome;
|
||||
use crate::client::LazyRemoteExecServerClient;
|
||||
use crate::protocol::FsCanonicalizeParams;
|
||||
use crate::protocol::FsCopyParams;
|
||||
@@ -23,6 +25,7 @@ use crate::protocol::FsGetMetadataParams;
|
||||
use crate::protocol::FsReadDirectoryParams;
|
||||
use crate::protocol::FsReadFileParams;
|
||||
use crate::protocol::FsRemoveParams;
|
||||
use crate::protocol::FsWalkParams;
|
||||
use crate::protocol::FsWriteFileParams;
|
||||
|
||||
const INVALID_REQUEST_ERROR_CODE: i64 = -32600;
|
||||
@@ -183,6 +186,25 @@ impl RemoteFileSystem {
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn walk(
|
||||
&self,
|
||||
path: &PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&FileSystemSandboxContext>,
|
||||
) -> FileSystemResult<WalkOutcome> {
|
||||
trace!("remote fs walk");
|
||||
let client = self.client.get().await.map_err(map_remote_error)?;
|
||||
let response = client
|
||||
.fs_walk(FsWalkParams {
|
||||
path: path.clone(),
|
||||
options,
|
||||
sandbox: remote_sandbox_context(sandbox),
|
||||
})
|
||||
.await
|
||||
.map_err(map_remote_error)?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn remove(
|
||||
&self,
|
||||
path: &PathUri,
|
||||
@@ -286,6 +308,15 @@ impl ExecutorFileSystem for RemoteFileSystem {
|
||||
Box::pin(RemoteFileSystem::read_directory(self, path, sandbox))
|
||||
}
|
||||
|
||||
fn walk<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
|
||||
Box::pin(RemoteFileSystem::walk(self, path, options, sandbox))
|
||||
}
|
||||
|
||||
fn remove<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
|
||||
@@ -15,6 +15,8 @@ use crate::FileSystemResult;
|
||||
use crate::FileSystemSandboxContext;
|
||||
use crate::ReadDirectoryEntry;
|
||||
use crate::RemoveOptions;
|
||||
use crate::WalkOptions;
|
||||
use crate::WalkOutcome;
|
||||
use crate::fs_helper::FsHelperPayload;
|
||||
use crate::fs_helper::FsHelperRequest;
|
||||
use crate::fs_sandbox::FileSystemSandboxRunner;
|
||||
@@ -25,6 +27,7 @@ use crate::protocol::FsGetMetadataParams;
|
||||
use crate::protocol::FsReadDirectoryParams;
|
||||
use crate::protocol::FsReadFileParams;
|
||||
use crate::protocol::FsRemoveParams;
|
||||
use crate::protocol::FsWalkParams;
|
||||
use crate::protocol::FsWriteFileParams;
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -200,6 +203,29 @@ impl SandboxedFileSystem {
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn walk(
|
||||
&self,
|
||||
path: &PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&FileSystemSandboxContext>,
|
||||
) -> FileSystemResult<WalkOutcome> {
|
||||
let sandbox = require_platform_sandbox(sandbox)?;
|
||||
validate_native_path(path)?;
|
||||
let response = self
|
||||
.run_sandboxed(
|
||||
sandbox,
|
||||
FsHelperRequest::Walk(FsWalkParams {
|
||||
path: path.clone(),
|
||||
options,
|
||||
sandbox: None,
|
||||
}),
|
||||
)
|
||||
.await?
|
||||
.expect_walk()
|
||||
.map_err(map_sandbox_error)?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn remove(
|
||||
&self,
|
||||
path: &PathUri,
|
||||
@@ -317,6 +343,15 @@ impl ExecutorFileSystem for SandboxedFileSystem {
|
||||
Box::pin(SandboxedFileSystem::read_directory(self, path, sandbox))
|
||||
}
|
||||
|
||||
fn walk<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
|
||||
Box::pin(SandboxedFileSystem::walk(self, path, options, sandbox))
|
||||
}
|
||||
|
||||
fn remove<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
|
||||
@@ -33,6 +33,8 @@ use crate::protocol::FsReadFileParams;
|
||||
use crate::protocol::FsReadFileResponse;
|
||||
use crate::protocol::FsRemoveParams;
|
||||
use crate::protocol::FsRemoveResponse;
|
||||
use crate::protocol::FsWalkParams;
|
||||
use crate::protocol::FsWalkResponse;
|
||||
use crate::protocol::FsWriteFileParams;
|
||||
use crate::protocol::FsWriteFileResponse;
|
||||
use crate::rpc::internal_error;
|
||||
@@ -198,6 +200,16 @@ impl FileSystemHandler {
|
||||
Ok(FsReadDirectoryResponse { entries })
|
||||
}
|
||||
|
||||
pub(crate) async fn walk(
|
||||
&self,
|
||||
params: FsWalkParams,
|
||||
) -> Result<FsWalkResponse, JSONRPCErrorError> {
|
||||
self.file_system
|
||||
.walk(¶ms.path, params.options, params.sandbox.as_ref())
|
||||
.await
|
||||
.map_err(map_fs_error)
|
||||
}
|
||||
|
||||
pub(crate) async fn remove(
|
||||
&self,
|
||||
params: FsRemoveParams,
|
||||
|
||||
@@ -37,6 +37,8 @@ use crate::protocol::FsReadFileParams;
|
||||
use crate::protocol::FsReadFileResponse;
|
||||
use crate::protocol::FsRemoveParams;
|
||||
use crate::protocol::FsRemoveResponse;
|
||||
use crate::protocol::FsWalkParams;
|
||||
use crate::protocol::FsWalkResponse;
|
||||
use crate::protocol::FsWriteFileParams;
|
||||
use crate::protocol::FsWriteFileResponse;
|
||||
use crate::protocol::HttpRequestParams;
|
||||
@@ -311,6 +313,14 @@ impl ExecServerHandler {
|
||||
self.file_system.read_directory(params).await
|
||||
}
|
||||
|
||||
pub(crate) async fn fs_walk(
|
||||
&self,
|
||||
params: FsWalkParams,
|
||||
) -> Result<FsWalkResponse, JSONRPCErrorError> {
|
||||
self.require_initialized_for("filesystem")?;
|
||||
self.file_system.walk(params).await
|
||||
}
|
||||
|
||||
pub(crate) async fn fs_remove(
|
||||
&self,
|
||||
params: FsRemoveParams,
|
||||
|
||||
@@ -17,6 +17,7 @@ use crate::protocol::FS_READ_BLOCK_METHOD;
|
||||
use crate::protocol::FS_READ_DIRECTORY_METHOD;
|
||||
use crate::protocol::FS_READ_FILE_METHOD;
|
||||
use crate::protocol::FS_REMOVE_METHOD;
|
||||
use crate::protocol::FS_WALK_METHOD;
|
||||
use crate::protocol::FS_WRITE_FILE_METHOD;
|
||||
use crate::protocol::FsCanonicalizeParams;
|
||||
use crate::protocol::FsCloseParams;
|
||||
@@ -28,6 +29,7 @@ use crate::protocol::FsReadBlockParams;
|
||||
use crate::protocol::FsReadDirectoryParams;
|
||||
use crate::protocol::FsReadFileParams;
|
||||
use crate::protocol::FsRemoveParams;
|
||||
use crate::protocol::FsWalkParams;
|
||||
use crate::protocol::FsWriteFileParams;
|
||||
use crate::protocol::HTTP_REQUEST_METHOD;
|
||||
use crate::protocol::HttpRequestParams;
|
||||
@@ -147,6 +149,12 @@ pub(crate) fn build_router() -> RpcRouter<ExecServerHandler> {
|
||||
handler.fs_read_directory(params).await
|
||||
},
|
||||
);
|
||||
router.request(
|
||||
FS_WALK_METHOD,
|
||||
|handler: Arc<ExecServerHandler>, params: FsWalkParams| async move {
|
||||
handler.fs_walk(params).await
|
||||
},
|
||||
);
|
||||
router.request(
|
||||
FS_REMOVE_METHOD,
|
||||
|handler: Arc<ExecServerHandler>, params: FsRemoveParams| async move {
|
||||
|
||||
@@ -6,6 +6,10 @@ use codex_exec_server::FILE_READ_CHUNK_SIZE;
|
||||
use codex_exec_server::FileMetadata;
|
||||
use codex_exec_server::ReadDirectoryEntry;
|
||||
use codex_exec_server::RemoveOptions;
|
||||
use codex_exec_server::WalkEntry;
|
||||
use codex_exec_server::WalkEntryKind;
|
||||
use codex_exec_server::WalkOptions;
|
||||
use codex_exec_server::WalkOutcome;
|
||||
use codex_protocol::models::AdditionalPermissionProfile;
|
||||
use codex_protocol::models::FileSystemPermissions;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
@@ -373,6 +377,179 @@ async fn file_system_read_directory_lists_entries(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_case(FileSystemImplementation::Local ; "local")]
|
||||
#[test_case(FileSystemImplementation::Remote ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn file_system_walk_returns_a_bounded_tree(
|
||||
implementation: FileSystemImplementation,
|
||||
) -> Result<()> {
|
||||
let context = create_file_system_context(implementation).await?;
|
||||
let file_system = context.file_system;
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let source_dir = tmp.path().join("source");
|
||||
let nested_dir = source_dir.join("nested");
|
||||
std::fs::create_dir_all(&nested_dir)?;
|
||||
std::fs::write(source_dir.join("root.txt"), "root")?;
|
||||
std::fs::write(nested_dir.join("note.txt"), "nested")?;
|
||||
|
||||
let source_uri = PathUri::from_host_native_path(&source_dir)?;
|
||||
let outcome = file_system
|
||||
.walk(
|
||||
&source_uri,
|
||||
WalkOptions {
|
||||
max_depth: 4,
|
||||
max_directories: 10,
|
||||
max_entries: 10,
|
||||
},
|
||||
/*sandbox*/ None,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("mode={implementation}"))?;
|
||||
assert_eq!(
|
||||
outcome,
|
||||
WalkOutcome {
|
||||
entries: vec![
|
||||
WalkEntry {
|
||||
path: PathUri::from_host_native_path(&nested_dir)?,
|
||||
kind: WalkEntryKind::Directory,
|
||||
},
|
||||
WalkEntry {
|
||||
path: PathUri::from_host_native_path(source_dir.join("root.txt"))?,
|
||||
kind: WalkEntryKind::File,
|
||||
},
|
||||
WalkEntry {
|
||||
path: PathUri::from_host_native_path(nested_dir.join("note.txt"))?,
|
||||
kind: WalkEntryKind::File,
|
||||
},
|
||||
],
|
||||
errors: Vec::new(),
|
||||
truncated: false,
|
||||
}
|
||||
);
|
||||
|
||||
let root_entries = vec![
|
||||
WalkEntry {
|
||||
path: PathUri::from_host_native_path(&nested_dir)?,
|
||||
kind: WalkEntryKind::Directory,
|
||||
},
|
||||
WalkEntry {
|
||||
path: PathUri::from_host_native_path(source_dir.join("root.txt"))?,
|
||||
kind: WalkEntryKind::File,
|
||||
},
|
||||
];
|
||||
let shallow = file_system
|
||||
.walk(
|
||||
&source_uri,
|
||||
WalkOptions {
|
||||
max_depth: 0,
|
||||
max_directories: 10,
|
||||
max_entries: 10,
|
||||
},
|
||||
/*sandbox*/ None,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("mode={implementation}"))?;
|
||||
assert_eq!(
|
||||
shallow,
|
||||
WalkOutcome {
|
||||
entries: root_entries.clone(),
|
||||
errors: Vec::new(),
|
||||
truncated: false,
|
||||
}
|
||||
);
|
||||
|
||||
let directory_bounded = file_system
|
||||
.walk(
|
||||
&source_uri,
|
||||
WalkOptions {
|
||||
max_depth: 4,
|
||||
max_directories: 1,
|
||||
max_entries: 10,
|
||||
},
|
||||
/*sandbox*/ None,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("mode={implementation}"))?;
|
||||
assert_eq!(
|
||||
directory_bounded,
|
||||
WalkOutcome {
|
||||
entries: root_entries,
|
||||
errors: Vec::new(),
|
||||
truncated: true,
|
||||
}
|
||||
);
|
||||
|
||||
let bounded = file_system
|
||||
.walk(
|
||||
&source_uri,
|
||||
WalkOptions {
|
||||
max_depth: 4,
|
||||
max_directories: 10,
|
||||
max_entries: 1,
|
||||
},
|
||||
/*sandbox*/ None,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("mode={implementation}"))?;
|
||||
assert_eq!(
|
||||
bounded,
|
||||
WalkOutcome {
|
||||
entries: vec![WalkEntry {
|
||||
path: PathUri::from_host_native_path(&nested_dir)?,
|
||||
kind: WalkEntryKind::Directory,
|
||||
}],
|
||||
errors: Vec::new(),
|
||||
truncated: true,
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_case(FileSystemImplementation::Local ; "local")]
|
||||
#[test_case(FileSystemImplementation::Remote ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn file_system_walk_honors_read_sandbox(
|
||||
implementation: FileSystemImplementation,
|
||||
) -> Result<()> {
|
||||
let context = create_file_system_context(implementation).await?;
|
||||
let file_system = context.file_system;
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let source_dir = tmp.path().join("source");
|
||||
let file_path = source_dir.join("note.txt");
|
||||
std::fs::create_dir_all(&source_dir)?;
|
||||
std::fs::write(&file_path, "sandboxed")?;
|
||||
let sandbox = read_only_sandbox(source_dir.clone());
|
||||
|
||||
let outcome = file_system
|
||||
.walk(
|
||||
&PathUri::from_host_native_path(&source_dir)?,
|
||||
WalkOptions {
|
||||
max_depth: 1,
|
||||
max_directories: 2,
|
||||
max_entries: 2,
|
||||
},
|
||||
Some(&sandbox),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("mode={implementation}"))?;
|
||||
assert_eq!(
|
||||
outcome,
|
||||
WalkOutcome {
|
||||
entries: vec![WalkEntry {
|
||||
path: PathUri::from_host_native_path(file_path)?,
|
||||
kind: WalkEntryKind::File,
|
||||
}],
|
||||
errors: Vec::new(),
|
||||
truncated: false,
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_case(FileSystemImplementation::Local ; "local")]
|
||||
#[test_case(FileSystemImplementation::Remote ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
|
||||
@@ -24,6 +24,10 @@ use codex_exec_server::CreateDirectoryOptions;
|
||||
use codex_exec_server::Environment;
|
||||
use codex_exec_server::FileMetadata;
|
||||
use codex_exec_server::RemoveOptions;
|
||||
use codex_exec_server::WalkEntry;
|
||||
use codex_exec_server::WalkEntryKind;
|
||||
use codex_exec_server::WalkOptions;
|
||||
use codex_exec_server::WalkOutcome;
|
||||
use codex_utils_path_uri::PathUri;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
@@ -266,6 +270,54 @@ async fn file_system_get_metadata_reports_symlink_targets(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_case(FileSystemImplementation::Local ; "local")]
|
||||
#[test_case(FileSystemImplementation::Remote ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn file_system_walk_ignores_symlinks(implementation: FileSystemImplementation) -> Result<()> {
|
||||
let context = create_file_system_context(implementation).await?;
|
||||
let file_system = context.file_system;
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let root = tmp.path().join("root");
|
||||
let target = root.join("target");
|
||||
let target_file = target.join("note.txt");
|
||||
std::fs::create_dir_all(&target)?;
|
||||
std::fs::write(&target_file, "target")?;
|
||||
symlink(&target, root.join("target-link"))?;
|
||||
|
||||
let outcome = file_system
|
||||
.walk(
|
||||
&PathUri::from_host_native_path(&root)?,
|
||||
WalkOptions {
|
||||
max_depth: 2,
|
||||
max_directories: 4,
|
||||
max_entries: 8,
|
||||
},
|
||||
/*sandbox*/ None,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("mode={implementation}"))?;
|
||||
assert_eq!(
|
||||
outcome,
|
||||
WalkOutcome {
|
||||
entries: vec![
|
||||
WalkEntry {
|
||||
path: PathUri::from_host_native_path(&target)?,
|
||||
kind: WalkEntryKind::Directory,
|
||||
},
|
||||
WalkEntry {
|
||||
path: PathUri::from_host_native_path(target_file)?,
|
||||
kind: WalkEntryKind::File,
|
||||
},
|
||||
],
|
||||
errors: Vec::new(),
|
||||
truncated: false,
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_case(FileSystemImplementation::Local ; "local")]
|
||||
#[test_case(FileSystemImplementation::Remote ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
|
||||
@@ -12,6 +12,7 @@ use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use codex_utils_path_uri::PathUri;
|
||||
use futures::Stream;
|
||||
use std::collections::VecDeque;
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
@@ -21,6 +22,11 @@ use std::task::Poll;
|
||||
|
||||
/// Maximum chunk size returned by [`ExecutorFileSystem::read_file_stream`].
|
||||
pub const FILE_READ_CHUNK_SIZE: usize = 1024 * 1024;
|
||||
const MAX_WALK_DEPTH: usize = 64;
|
||||
const MAX_WALK_DIRECTORIES: usize = 10_000;
|
||||
const MAX_WALK_ENTRIES: usize = 50_000;
|
||||
const MAX_WALK_RESPONSE_BYTES: usize = 4 * 1024 * 1024;
|
||||
const WALK_RESPONSE_ITEM_OVERHEAD_BYTES: usize = 64;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct CreateDirectoryOptions {
|
||||
@@ -56,6 +62,51 @@ pub struct ReadDirectoryEntry {
|
||||
pub is_file: bool,
|
||||
}
|
||||
|
||||
/// Bounds for a recursive filesystem walk.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WalkOptions {
|
||||
/// Maximum directory depth below the root that may be traversed.
|
||||
pub max_depth: usize,
|
||||
/// Maximum number of directories that may be traversed, including the root.
|
||||
pub max_directories: usize,
|
||||
/// Maximum number of directory entries that may be examined.
|
||||
pub max_entries: usize,
|
||||
}
|
||||
|
||||
/// Type of a filesystem entry returned by a walk.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum WalkEntryKind {
|
||||
Directory,
|
||||
File,
|
||||
}
|
||||
|
||||
/// One non-symlink entry returned by a walk.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WalkEntry {
|
||||
pub path: PathUri,
|
||||
pub kind: WalkEntryKind,
|
||||
}
|
||||
|
||||
/// A descendant that could not be inspected during a walk.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WalkError {
|
||||
pub path: PathUri,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Entries and recoverable errors collected by a bounded walk.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WalkOutcome {
|
||||
pub entries: Vec<WalkEntry>,
|
||||
pub errors: Vec<WalkError>,
|
||||
pub truncated: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FileSystemSandboxContext {
|
||||
@@ -249,6 +300,16 @@ pub trait ExecutorFileSystem: Send + Sync {
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, Vec<ReadDirectoryEntry>>;
|
||||
|
||||
/// Recursively lists descendants, skipping symlinks.
|
||||
fn walk<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
|
||||
Box::pin(walk(self, path, options, sandbox))
|
||||
}
|
||||
|
||||
fn remove<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
@@ -264,3 +325,155 @@ pub trait ExecutorFileSystem: Send + Sync {
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, ()>;
|
||||
}
|
||||
|
||||
async fn walk<F: ExecutorFileSystem + ?Sized>(
|
||||
file_system: &F,
|
||||
root: &PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&FileSystemSandboxContext>,
|
||||
) -> FileSystemResult<WalkOutcome> {
|
||||
if options.max_directories == 0 || options.max_entries == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"filesystem walk limits must be greater than zero",
|
||||
));
|
||||
}
|
||||
if options.max_depth > MAX_WALK_DEPTH
|
||||
|| options.max_directories > MAX_WALK_DIRECTORIES
|
||||
|| options.max_entries > MAX_WALK_ENTRIES
|
||||
{
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"filesystem walk limits exceed maximums: depth={MAX_WALK_DEPTH}, directories={MAX_WALK_DIRECTORIES}, entries={MAX_WALK_ENTRIES}"
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let root_metadata = file_system.get_metadata(root, sandbox).await?;
|
||||
if root_metadata.is_symlink || !root_metadata.is_directory {
|
||||
return Ok(WalkOutcome::default());
|
||||
}
|
||||
|
||||
let mut outcome = WalkOutcome::default();
|
||||
let mut queue = VecDeque::from([(root.clone(), 0usize)]);
|
||||
let mut directory_count = 1usize;
|
||||
let mut entry_count = 0usize;
|
||||
let mut response_bytes = 0usize;
|
||||
|
||||
while let Some((directory, depth)) = queue.pop_front() {
|
||||
let mut entries = match file_system.read_directory(&directory, sandbox).await {
|
||||
Ok(entries) => entries,
|
||||
Err(error) => {
|
||||
if !push_walk_error(
|
||||
&mut outcome,
|
||||
&mut response_bytes,
|
||||
directory,
|
||||
error.to_string(),
|
||||
) {
|
||||
return Ok(outcome);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
entries.sort_by(|left, right| left.file_name.cmp(&right.file_name));
|
||||
|
||||
for entry in entries {
|
||||
if entry_count == options.max_entries {
|
||||
outcome.truncated = true;
|
||||
return Ok(outcome);
|
||||
}
|
||||
entry_count += 1;
|
||||
|
||||
let path = match directory.join(&entry.file_name) {
|
||||
Ok(path) => path,
|
||||
Err(error) => {
|
||||
if !push_walk_error(
|
||||
&mut outcome,
|
||||
&mut response_bytes,
|
||||
directory.clone(),
|
||||
error.to_string(),
|
||||
) {
|
||||
return Ok(outcome);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let metadata = match file_system.get_metadata(&path, sandbox).await {
|
||||
Ok(metadata) => metadata,
|
||||
Err(error) => {
|
||||
if !push_walk_error(&mut outcome, &mut response_bytes, path, error.to_string())
|
||||
{
|
||||
return Ok(outcome);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if metadata.is_symlink {
|
||||
continue;
|
||||
}
|
||||
|
||||
let kind = if metadata.is_directory {
|
||||
WalkEntryKind::Directory
|
||||
} else if metadata.is_file {
|
||||
WalkEntryKind::File
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
if !reserve_walk_response_bytes(
|
||||
&mut outcome,
|
||||
&mut response_bytes,
|
||||
path.to_string().len(),
|
||||
) {
|
||||
return Ok(outcome);
|
||||
}
|
||||
outcome.entries.push(WalkEntry {
|
||||
path: path.clone(),
|
||||
kind,
|
||||
});
|
||||
|
||||
if kind == WalkEntryKind::Directory && depth < options.max_depth {
|
||||
if directory_count == options.max_directories {
|
||||
outcome.truncated = true;
|
||||
} else {
|
||||
directory_count += 1;
|
||||
queue.push_back((path, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(outcome)
|
||||
}
|
||||
|
||||
fn push_walk_error(
|
||||
outcome: &mut WalkOutcome,
|
||||
response_bytes: &mut usize,
|
||||
path: PathUri,
|
||||
message: String,
|
||||
) -> bool {
|
||||
let item_bytes = path.to_string().len().saturating_add(message.len());
|
||||
if !reserve_walk_response_bytes(outcome, response_bytes, item_bytes) {
|
||||
return false;
|
||||
}
|
||||
outcome.errors.push(WalkError { path, message });
|
||||
true
|
||||
}
|
||||
|
||||
fn reserve_walk_response_bytes(
|
||||
outcome: &mut WalkOutcome,
|
||||
response_bytes: &mut usize,
|
||||
content_bytes: usize,
|
||||
) -> bool {
|
||||
let item_bytes = content_bytes.saturating_add(WALK_RESPONSE_ITEM_OVERHEAD_BYTES);
|
||||
let Some(total_bytes) = response_bytes.checked_add(item_bytes) else {
|
||||
outcome.truncated = true;
|
||||
return false;
|
||||
};
|
||||
if total_bytes > MAX_WALK_RESPONSE_BYTES {
|
||||
outcome.truncated = true;
|
||||
return false;
|
||||
}
|
||||
*response_bytes = total_bytes;
|
||||
true
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user