path-uri: render native paths across platforms (#27819)

## Why

We're moving to `PathUri` in more places to support cross-OS
app-server/exec-server, but we don't want to expose the URI encoding to
users of app-server's public APIs yet.

We'll need to translate at the app-server API boundary between
client-visible "regular" paths that are appropriate for the OS of the
environment for which the paths make sense, which means using the
environment's path personality to do the conversion.

`PathUri` doesn't yet attempt to encode environment ID, so for now we'll
sniff the most likely path convention for a given path.

## What

- Add `PathConvention` and `NativePathString` with host-independent
POSIX, Windows drive, and UNC rendering.
- Cover cross-host rendering, encoding, Unicode, invalid components.
This commit is contained in:
Adam Perry @ OpenAI
2026-06-13 22:26:49 -07:00
committed by GitHub
Unverified
parent 42dec90bc4
commit 5e9249ec02
4 changed files with 1017 additions and 14 deletions
@@ -0,0 +1,399 @@
use crate::PathUri;
use codex_utils_absolute_path::AbsolutePathBuf;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use serde::Serializer;
use std::fmt;
use thiserror::Error;
use ts_rs::TS;
/// A UTF-8 path for preserving raw path compatibility at the app-server API
/// boundary while Codex migrates to [`PathUri`].
///
/// Supports storing arbitrary strings read from the API and converting to and
/// from [`PathUri`] using an explicitly selected native path convention.
///
/// When converting from [`PathUri`], "native" refers to the supplied
/// [`PathConvention`], which may be foreign to the operating system running
/// this process. The inner string is private so path-producing code must use
/// [`Self::from_abs_path`] or [`Self::from_path_uri`] instead of bypassing the
/// intended conversion boundary. Non-UTF-8 paths are converted to UTF-8
/// lossily because this API value is serialized as a JSON string.
///
/// Deserialization accepts any UTF-8 string without interpreting or validating
/// it. That unrestricted construction path is intentionally available only to
/// serde: Codex-internal code cannot construct this type directly from a raw
/// `String` and is instead encouraged to convert through [`PathUri`] or
/// [`AbsolutePathBuf`]. Relative path text remains valid until an operation
/// such as [`Self::to_path_uri`] requires an absolute path.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, TS)]
#[serde(transparent)]
#[ts(type = "string")]
pub struct ApiPathString(String);
impl ApiPathString {
/// Renders an absolute path using the current host's path convention.
pub fn from_abs_path(path: &AbsolutePathBuf) -> Self {
Self(path.to_string_lossy().into_owned())
}
/// Renders a path URI using the requested native path convention.
///
/// Rendering fails when the URI shape does not match the convention, such
/// as a POSIX path rendered as Windows or a UNC path rendered as POSIX. It
/// also fails when an opaque fallback does not encode an absolute path for
/// the convention. Non-UTF-8 segments are rendered lossily, and encoded
/// separators are emitted as native path text.
pub fn from_path_uri(
path: &PathUri,
convention: PathConvention,
) -> Result<Self, ApiPathStringError> {
if let Some(path_bytes) = path.opaque_fallback_bytes() {
return render_opaque_fallback(path, &path_bytes, convention).map(Self);
}
match convention {
PathConvention::Posix => render_posix_path(path),
PathConvention::Windows => render_windows_path(path),
}
.map(Self)
}
/// Parses this API string as an absolute path using the requested native
/// path convention and returns its canonical path URI.
pub fn to_path_uri(&self, convention: PathConvention) -> Result<PathUri, ApiPathStringError> {
let path = match convention {
PathConvention::Posix => parse_posix_path(&self.0),
PathConvention::Windows => parse_windows_path(&self.0),
};
path.ok_or_else(|| ApiPathStringError::InvalidNativePath {
path: self.0.clone(),
convention,
})
}
/// Infers the path convention of an absolute API path from its spelling.
///
/// Relative paths and ambiguous spellings return `None`. In particular,
/// slash-prefixed paths are treated as POSIX even when they could also be
/// interpreted as slash-delimited Windows UNC paths.
pub fn infer_absolute_path_convention(&self) -> Option<PathConvention> {
let bytes = self.0.as_bytes();
let has_windows_drive_root = matches!(
bytes,
[drive, b':', separator, ..]
if drive.is_ascii_alphabetic() && is_windows_separator_byte(*separator)
);
if has_windows_drive_root || self.0.starts_with(r"\\") {
Some(PathConvention::Windows)
} else if self.0.starts_with('/') {
Some(PathConvention::Posix)
} else {
None
}
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_string(self) -> String {
self.0
}
}
fn parse_posix_path(path: &str) -> Option<PathUri> {
let path = path.strip_prefix('/')?;
if path.contains('\0') {
return Some(PathUri::from_opaque_path_bytes(
format!("/{path}").as_bytes(),
));
}
path_uri_from_segments(/*host*/ None, path.split('/'))
}
fn parse_windows_path(path: &str) -> Option<PathUri> {
let bytes = path.as_bytes();
let uses_namespace = matches!(
bytes,
[first, second, namespace @ (b'.' | b'?'), separator, ..]
if is_windows_separator_byte(*first)
&& is_windows_separator_byte(*second)
&& is_windows_separator_byte(*separator)
&& matches!(*namespace, b'.' | b'?')
);
if uses_namespace || path.contains('\0') {
return Some(windows_opaque_path_uri(path));
}
if matches!(
bytes,
[drive, b':', separator, ..]
if drive.is_ascii_alphabetic() && is_windows_separator_byte(*separator)
) {
return path_uri_from_segments(
/*host*/ None,
std::iter::once(&path[..2]).chain(path[3..].split(is_windows_separator_char)),
);
}
if matches!(bytes, [first, second, ..]
if is_windows_separator_byte(*first) && is_windows_separator_byte(*second))
{
let mut components = path[2..].split(is_windows_separator_char);
let host = components.next().filter(|host| !host.is_empty())?;
let share = components.next().filter(|share| !share.is_empty())?;
return path_uri_from_segments(Some(host), std::iter::once(share).chain(components))
.or_else(|| Some(windows_opaque_path_uri(path)));
}
None
}
fn path_uri_from_segments<'a>(
host: Option<&str>,
segments: impl Iterator<Item = &'a str>,
) -> Option<PathUri> {
let mut url = url::Url::parse("file:///").ok()?;
if let Some(host) = host {
url.set_host(Some(host)).ok()?;
}
{
let mut url_segments = url.path_segments_mut().ok()?;
url_segments.clear();
for segment in segments {
url_segments.push(segment);
}
}
PathUri::try_from(url).ok()
}
fn windows_opaque_path_uri(path: &str) -> PathUri {
let path_bytes = path
.encode_utf16()
.flat_map(u16::to_le_bytes)
.collect::<Vec<_>>();
PathUri::from_opaque_path_bytes(&path_bytes)
}
fn is_windows_separator_char(character: char) -> bool {
matches!(character, '\\' | '/')
}
fn is_windows_separator_byte(character: u8) -> bool {
matches!(character, b'\\' | b'/')
}
fn render_opaque_fallback(
path: &PathUri,
path_bytes: &[u8],
convention: PathConvention,
) -> Result<String, ApiPathStringError> {
let rendered = match convention {
PathConvention::Posix if path_bytes.starts_with(b"/") => {
Some(String::from_utf8_lossy(path_bytes).into_owned())
}
PathConvention::Windows => render_windows_opaque_fallback(path_bytes),
PathConvention::Posix => None,
};
rendered.ok_or_else(|| ApiPathStringError::OpaqueFallback {
path: path.to_string(),
})
}
fn render_windows_opaque_fallback(path_bytes: &[u8]) -> Option<String> {
if !path_bytes.len().is_multiple_of(2) {
return None;
}
let path_wide = path_bytes
.chunks_exact(2)
.map(|bytes| u16::from_le_bytes([bytes[0], bytes[1]]))
.collect::<Vec<_>>();
// Windows absolute paths either have a rooted drive prefix (`C:\\`) or a
// rooted namespace/UNC prefix (`\\server`, `\\.\\`, or `\\?\\`).
let has_drive_root = matches!(
path_wide.as_slice(),
[drive, colon, separator, ..]
if ((u16::from(b'A')..=u16::from(b'Z')).contains(drive)
|| (u16::from(b'a')..=u16::from(b'z')).contains(drive))
&& *colon == u16::from(b':')
&& is_windows_separator(*separator)
);
let has_namespace_or_unc_root = matches!(
path_wide.as_slice(),
[first, second, ..]
if is_windows_separator(*first) && is_windows_separator(*second)
);
(has_drive_root || has_namespace_or_unc_root).then(|| String::from_utf16_lossy(&path_wide))
}
fn is_windows_separator(character: u16) -> bool {
character == u16::from(b'\\') || character == u16::from(b'/')
}
impl fmt::Display for ApiPathString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl Serialize for ApiPathString {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.0)
}
}
impl JsonSchema for ApiPathString {
fn schema_name() -> String {
"ApiPathString".to_string()
}
fn json_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
String::json_schema(generator)
}
}
fn render_posix_path(path: &PathUri) -> Result<String, ApiPathStringError> {
let url = path.to_url();
// POSIX file paths do not have a UNC authority, so `file://server/share`
// cannot be represented as `/share` without losing the server identity.
if url.host_str().is_some() {
return Err(incompatible_convention(path, PathConvention::Posix));
}
// URI segments are already separated with `/` on every host. Decode each
// one independently so `file:///a%20dir/file` becomes `/a dir/file`.
let mut rendered = String::new();
for segment in path_segments(&url) {
rendered.push('/');
rendered.push_str(&decode_native_segment(segment));
}
Ok(rendered)
}
fn render_windows_path(path: &PathUri) -> Result<String, ApiPathStringError> {
let url = path.to_url();
let mut segments = path_segments(&url);
let mut rendered = String::new();
if let Some(host) = url.host_str() {
// A URI authority selects the UNC form: `file://server/share/file`
// becomes `\\server\share\file`. The first segment is the share name,
// which must be present.
let Some(share) = segments.next() else {
return Err(incompatible_convention(path, PathConvention::Windows));
};
let share = decode_native_segment(share);
if share.is_empty() {
return Err(incompatible_convention(path, PathConvention::Windows));
}
rendered.push_str(r"\\");
rendered.push_str(host);
rendered.push('\\');
rendered.push_str(&share);
} else {
// Without an authority, Windows requires a drive root. For example,
// `file:///C:/src/main.rs` begins with the `C:` URI segment and renders
// as `C:\src\main.rs`; a POSIX URI such as `file:///usr/bin` is rejected.
let Some(drive) = segments.next() else {
return Err(incompatible_convention(path, PathConvention::Windows));
};
let drive = decode_native_segment(drive);
let bytes = drive.as_bytes();
if bytes.len() != 2 || !bytes[0].is_ascii_alphabetic() || bytes[1] != b':' {
return Err(incompatible_convention(path, PathConvention::Windows));
}
rendered.push_str(&drive);
}
for segment in segments {
// URL path separators become Windows separators after each component
// has been decoded.
let segment = decode_native_segment(segment);
rendered.push('\\');
rendered.push_str(&segment);
}
// `file:///C:` and `file:///C:/` both identify the drive root, never the
// drive-relative path `C:`.
if rendered.len() == 2 && rendered.as_bytes()[1] == b':' {
rendered.push('\\');
}
Ok(rendered)
}
fn path_segments(url: &url::Url) -> std::str::Split<'_, char> {
url.path_segments()
.unwrap_or_else(|| unreachable!("validated file URLs have path segments"))
}
fn decode_native_segment(segment: &str) -> String {
// Decode exactly once. Thus `%20` becomes a space and `%252F` becomes the
// literal text `%2F`, rather than being decoded a second time into `/`.
let bytes = urlencoding::decode_binary(segment.as_bytes());
String::from_utf8_lossy(&bytes).into_owned()
}
fn incompatible_convention(path: &PathUri, convention: PathConvention) -> ApiPathStringError {
ApiPathStringError::IncompatibleConvention {
path: path.to_string(),
convention,
}
}
#[derive(Debug, Error, PartialEq, Eq)]
pub enum ApiPathStringError {
#[error("opaque fallback path URI `{path}` cannot be recovered as a native path")]
OpaqueFallback { path: String },
#[error("path URI `{path}` cannot be rendered using {convention} path syntax")]
IncompatibleConvention {
path: String,
convention: PathConvention,
},
#[error("path `{path}` is not absolute using {convention} path syntax")]
InvalidNativePath {
path: String,
convention: PathConvention,
},
}
/// Path syntax used to render a [`PathUri`] as an operating-system path.
///
/// This describes path grammar rather than a specific operating system because
/// Linux and macOS share the POSIX representation relevant here.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
pub enum PathConvention {
Posix,
Windows,
}
impl PathConvention {
/// Returns the path convention used by the current process.
#[cfg(windows)]
pub const fn native() -> Self {
Self::Windows
}
/// Returns the path convention used by the current process.
#[cfg(unix)]
pub const fn native() -> Self {
Self::Posix
}
}
impl fmt::Display for PathConvention {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Posix => f.write_str("POSIX"),
Self::Windows => f.write_str("Windows"),
}
}
}
#[cfg(test)]
#[path = "api_path_string_tests.rs"]
mod tests;
@@ -0,0 +1,498 @@
use super::*;
use crate::PathUri;
use codex_utils_absolute_path::AbsolutePathBuf;
use pretty_assertions::assert_eq;
#[derive(Clone, Copy, Debug)]
struct RenderCase {
uri: &'static str,
convention: PathConvention,
expected: RenderExpectation,
}
impl RenderCase {
const fn round_trips(
uri: &'static str,
convention: PathConvention,
rendered: &'static str,
) -> Self {
Self {
uri,
convention,
expected: RenderExpectation::RoundTrip(rendered),
}
}
const fn rejects(uri: &'static str, convention: PathConvention, error: ExpectedError) -> Self {
Self {
uri,
convention,
expected: RenderExpectation::Error(error),
}
}
const fn renders_lossily(
uri: &'static str,
convention: PathConvention,
rendered: &'static str,
) -> Self {
Self {
uri,
convention,
expected: RenderExpectation::RenderOnly(rendered),
}
}
}
#[derive(Clone, Copy, Debug)]
enum RenderExpectation {
RoundTrip(&'static str),
RenderOnly(&'static str),
Error(ExpectedError),
}
#[derive(Clone, Copy, Debug)]
enum ExpectedError {
OpaqueFallback,
IncompatibleConvention,
}
const RENDER_CASES: &[RenderCase] = &[
// POSIX paths.
RenderCase::round_trips("file:///", PathConvention::Posix, "/"),
RenderCase::round_trips(
"file:///home/alice/src/main.rs",
PathConvention::Posix,
"/home/alice/src/main.rs",
),
RenderCase::round_trips(
"file:///home/alice/a%20file.rs",
PathConvention::Posix,
"/home/alice/a file.rs",
),
RenderCase::round_trips(
"file:///workspace/src/lib.rs",
PathConvention::Posix,
"/workspace/src/lib.rs",
),
RenderCase::round_trips(
"file:///workspace/tests/test.rs",
PathConvention::Posix,
"/workspace/tests/test.rs",
),
RenderCase::round_trips("file:///etc", PathConvention::Posix, "/etc"),
RenderCase::round_trips("file:///tmp/", PathConvention::Posix, "/tmp/"),
RenderCase::round_trips("file:///C:/Project", PathConvention::Posix, "/C:/Project"),
RenderCase::round_trips("file:///C:", PathConvention::Posix, "/C:"),
RenderCase::round_trips("file:///tmp/%E2%98%83", PathConvention::Posix, "/tmp/☃"),
RenderCase::round_trips("file:///tmp/a%5Cb", PathConvention::Posix, "/tmp/a\\b"),
RenderCase::round_trips(
"file:///tmp/100%25/file",
PathConvention::Posix,
"/tmp/100%/file",
),
RenderCase::round_trips(
"file:///tmp/a%3Fb%23c%25d",
PathConvention::Posix,
"/tmp/a?b#c%d",
),
RenderCase::round_trips("file:///tmp/a%252Fb", PathConvention::Posix, "/tmp/a%2Fb"),
RenderCase::round_trips(
"file:///bad/path/L3RtcC9udWxsLQAt_y1ieXRl",
PathConvention::Posix,
"/bad/path/L3RtcC9udWxsLQAt_y1ieXRl",
),
RenderCase::round_trips(
"FILE:///workspace/src",
PathConvention::Posix,
"/workspace/src",
),
RenderCase::round_trips(
"file:/workspace/src",
PathConvention::Posix,
"/workspace/src",
),
RenderCase::round_trips(
"file://localhost/workspace/src",
PathConvention::Posix,
"/workspace/src",
),
RenderCase::round_trips(
"file://LOCALHOST/workspace/src",
PathConvention::Posix,
"/workspace/src",
),
// Windows drive paths.
RenderCase::round_trips(
"file:///C:/Users/Alice%20Smith/src/main.rs",
PathConvention::Windows,
r"C:\Users\Alice Smith\src\main.rs",
),
RenderCase::round_trips("file:///C:/", PathConvention::Windows, "C:\\"),
RenderCase::renders_lossily("file:///C:", PathConvention::Windows, "C:\\"),
RenderCase::round_trips("file:///C:/Users", PathConvention::Windows, r"C:\Users"),
RenderCase::round_trips("file:///C:/Windows", PathConvention::Windows, r"C:\Windows"),
RenderCase::round_trips(
"file:///d:/snowman/%E2%98%83",
PathConvention::Windows,
r"d:\snowman\☃",
),
RenderCase::round_trips("file:///C:/tmp/", PathConvention::Windows, "C:\\tmp\\"),
RenderCase::round_trips(
"file:///C:/test%20with%20%25/path",
PathConvention::Windows,
r"C:\test with %\path",
),
RenderCase::round_trips(
"file:///C:/test%20with%20%2525/c%23code",
PathConvention::Windows,
r"C:\test with %25\c#code",
),
RenderCase::round_trips(
"file:///C:/Source/Z%C3%BCrich%20or%20Zurich%20(%CB%88zj%CA%8A%C9%99r%C9%AAk,/Code/resources/app/plugins/c%23/plugin.json",
PathConvention::Windows,
r"C:\Source\Zürich or Zurich (ˈzjʊərɪk,\Code\resources\app\plugins\c#\plugin.json",
),
RenderCase::round_trips(
"file:///C:/project/owner's_file/database.sqlite",
PathConvention::Windows,
r"C:\project\owner's_file\database.sqlite",
),
RenderCase::round_trips(
"file:///C:/project/%25A0.txt",
PathConvention::Windows,
r"C:\project\%A0.txt",
),
RenderCase::round_trips(
"file:///C:/project/%252e.txt",
PathConvention::Windows,
r"C:\project\%2e.txt",
),
// Windows UNC paths.
RenderCase::round_trips(
"file://server/share/src/main.rs",
PathConvention::Windows,
r"\\server\share\src\main.rs",
),
RenderCase::round_trips(
"file://server/share",
PathConvention::Windows,
r"\\server\share",
),
RenderCase::round_trips(
"file://server/share/",
PathConvention::Windows,
"\\\\server\\share\\",
),
RenderCase::round_trips(
"file://shares/files/c%23/p.cs",
PathConvention::Windows,
r"\\shares\files\c#\p.cs",
),
RenderCase::round_trips(
"file://monacotools1/certificates/SSL/",
PathConvention::Windows,
"\\\\monacotools1\\certificates\\SSL\\",
),
// Opaque fallbacks rendered according to their source convention.
RenderCase::renders_lossily(
"file:///%00/bad/path/L3RtcC9udWxsLQAt_y1ieXRl",
PathConvention::Posix,
"/tmp/null-\0--byte",
),
RenderCase::round_trips(
"file:///%00/bad/path/XABcAC4AXABDAE8ATQAxAFwA",
PathConvention::Windows,
r"\\.\COM1\",
),
RenderCase::round_trips(
"file:///%00/bad/path/XABcAD8AXABWAG8AbAB1AG0AZQB7ADAAMAAwADAAMAAwADAAMAAtADAAMAAwADAALQAwADAAMAAwAC0AMAAwADAAMAAtADAAMAAwADAAMAAwADAAMAAwADAAMAAwAH0AXABmAGkAbABlAC4AcgBzAA",
PathConvention::Windows,
r"\\?\Volume{00000000-0000-0000-0000-000000000000}\file.rs",
),
// Windows rendering preserves path text without filesystem validation.
RenderCase::round_trips("file:///C:/a%3Fb", PathConvention::Windows, "C:\\a?b"),
RenderCase::round_trips("file:///C:/a*b", PathConvention::Windows, "C:\\a*b"),
RenderCase::round_trips(
"file:///C:/trailing.",
PathConvention::Windows,
"C:\\trailing.",
),
RenderCase::round_trips(
"file:///C:/trailing%20",
PathConvention::Windows,
"C:\\trailing ",
),
RenderCase::round_trips(
"file:///C:/control-%01",
PathConvention::Windows,
"C:\\control-\u{1}",
),
RenderCase::round_trips(
"file:///C:/file.txt:stream",
PathConvention::Windows,
"C:\\file.txt:stream",
),
RenderCase::round_trips(
"file://server/sh%3Fare/file.rs",
PathConvention::Windows,
"\\\\server\\sh?are\\file.rs",
),
// These renderings intentionally lose URI byte or segment boundaries.
RenderCase::renders_lossily(
"file:///tmp/non-utf8-%FF",
PathConvention::Posix,
"/tmp/non-utf8-",
),
RenderCase::renders_lossily(
"file:///tmp/non-utf8-%A0",
PathConvention::Posix,
"/tmp/non-utf8-",
),
RenderCase::renders_lossily("file:///tmp/a%2Fb", PathConvention::Posix, "/tmp/a/b"),
RenderCase::renders_lossily("file:///C:/a%2Fb", PathConvention::Windows, "C:\\a/b"),
RenderCase::renders_lossily("file:///C:/a%5Cb", PathConvention::Windows, "C:\\a\\b"),
// URI shapes that do not match the requested convention.
RenderCase::rejects(
"file://server/share/file.txt",
PathConvention::Posix,
ExpectedError::IncompatibleConvention,
),
RenderCase::rejects(
"file://server/share/file.rs",
PathConvention::Posix,
ExpectedError::IncompatibleConvention,
),
RenderCase::rejects(
"file:///usr/local/file.txt",
PathConvention::Windows,
ExpectedError::IncompatibleConvention,
),
RenderCase::rejects(
"file:///home/alice/file.rs",
PathConvention::Windows,
ExpectedError::IncompatibleConvention,
),
RenderCase::rejects(
"file://server/",
PathConvention::Windows,
ExpectedError::IncompatibleConvention,
),
RenderCase::rejects(
"file:///_:/path",
PathConvention::Windows,
ExpectedError::IncompatibleConvention,
),
// Invalid opaque fallback payloads.
RenderCase::rejects(
"file:///%00/bad/path/YQ",
PathConvention::Posix,
ExpectedError::OpaqueFallback,
),
RenderCase::rejects(
"file:///%00/bad/path/L3RtcC9udWxsLQAt_y1ieXRl",
PathConvention::Windows,
ExpectedError::OpaqueFallback,
),
];
#[test]
fn renders_native_paths_from_shared_cases() {
for case in RENDER_CASES {
let path = PathUri::parse(case.uri).expect("valid file URI");
let expected = match case.expected {
RenderExpectation::RoundTrip(rendered) => Ok(ApiPathString(rendered.to_string())),
RenderExpectation::RenderOnly(rendered) => Ok(ApiPathString(rendered.to_string())),
RenderExpectation::Error(ExpectedError::OpaqueFallback) => {
Err(ApiPathStringError::OpaqueFallback {
path: path.to_string(),
})
}
RenderExpectation::Error(ExpectedError::IncompatibleConvention) => {
Err(ApiPathStringError::IncompatibleConvention {
path: path.to_string(),
convention: case.convention,
})
}
};
let actual = ApiPathString::from_path_uri(&path, case.convention);
assert_eq!(actual, expected, "rendering {case:?}");
if let Ok(rendered) = &actual {
assert_eq!(
rendered.infer_absolute_path_convention(),
Some(case.convention),
"inferring {case:?}"
);
}
if let RenderExpectation::RoundTrip(rendered) = case.expected {
let api_path = serde_json::from_value::<ApiPathString>(serde_json::json!(rendered))
.expect("native path should deserialize from API text");
let reparsed = api_path
.to_path_uri(case.convention)
.expect("native path should parse using its convention");
assert_eq!(reparsed, path, "parsing {case:?}");
assert_eq!(
ApiPathString::from_path_uri(&reparsed, case.convention),
Ok(api_path),
"round-tripping {case:?}"
);
}
}
}
#[test]
fn relative_api_path_serializes_and_deserializes_unchanged() {
for raw_path in [".", "subdir", "subdir/file.rs"] {
let path = serde_json::from_value::<ApiPathString>(serde_json::json!(raw_path))
.expect("relative API path should deserialize");
assert_eq!(
serde_json::to_value(path).expect("relative API path should serialize"),
serde_json::json!(raw_path)
);
}
}
#[test]
fn relative_api_path_is_invalid_when_converted_to_a_path_uri() {
let raw_path = "subdir";
let path = serde_json::from_value::<ApiPathString>(serde_json::json!(raw_path))
.expect("relative API path should deserialize");
assert_eq!(path.infer_absolute_path_convention(), None);
assert_eq!(
path.to_path_uri(PathConvention::Posix),
Err(ApiPathStringError::InvalidNativePath {
path: raw_path.to_string(),
convention: PathConvention::Posix,
})
);
}
#[test]
fn other_non_absolute_api_paths_cannot_be_converted_to_path_uris() {
for (raw_path, convention) in [
(r"workspace\file.rs", PathConvention::Windows),
(r"C:file.rs", PathConvention::Windows),
] {
let path = serde_json::from_value::<ApiPathString>(serde_json::json!(raw_path))
.expect("API path should deserialize without validation");
assert_eq!(path.infer_absolute_path_convention(), None);
assert_eq!(
path.to_path_uri(convention),
Err(ApiPathStringError::InvalidNativePath {
path: raw_path.to_string(),
convention,
})
);
}
}
#[test]
fn infers_absolute_path_conventions_from_api_text() {
for (raw_path, expected) in [
(r"C:\workspace\file.rs", Some(PathConvention::Windows)),
("c:/workspace/file.rs", Some(PathConvention::Windows)),
(r"\\server\share\file.rs", Some(PathConvention::Windows)),
(r"\\?\C:\workspace\file.rs", Some(PathConvention::Windows)),
(r"\\.\COM1", Some(PathConvention::Windows)),
("/workspace/file.rs", Some(PathConvention::Posix)),
("/C:/workspace/file.rs", Some(PathConvention::Posix)),
("//server/share/file.rs", Some(PathConvention::Posix)),
("", None),
(".", None),
("subdir/file.rs", None),
(r"subdir\file.rs", None),
(r"C:file.rs", None),
(r"\rooted-without-drive", None),
] {
let path = serde_json::from_value::<ApiPathString>(serde_json::json!(raw_path))
.expect("API path should deserialize without validation");
assert_eq!(
path.infer_absolute_path_convention(),
expected,
"inferring {raw_path:?}"
);
}
}
#[test]
fn foreign_absolute_syntax_deserializes_without_host_interpretation() {
for (raw_path, convention) in [
(r"C:\workspace\file.rs", PathConvention::Windows),
("/workspace/file.rs", PathConvention::Posix),
] {
let path = serde_json::from_value::<ApiPathString>(serde_json::json!(raw_path))
.expect("foreign API path should deserialize");
assert_eq!(path.as_str(), raw_path);
assert_eq!(path.infer_absolute_path_convention(), Some(convention));
}
}
#[test]
fn renders_an_absolute_path_using_the_host_convention() {
#[cfg(unix)]
let native_path = "/workspace/a file.rs";
#[cfg(windows)]
let native_path = r"C:\workspace\a file.rs";
let path = AbsolutePathBuf::from_absolute_path_checked(native_path)
.expect("native path should be absolute");
assert_eq!(
ApiPathString::from_abs_path(&path),
ApiPathString(native_path.to_string())
);
}
#[cfg(windows)]
#[test]
fn renders_native_non_unicode_windows_fallback_lossily() {
use std::os::windows::ffi::OsStringExt;
let native_path = std::path::PathBuf::from(std::ffi::OsString::from_wide(
&r"C:\bad\"
.encode_utf16()
.chain([0xd800])
.collect::<Vec<_>>(),
));
let native_path =
AbsolutePathBuf::from_absolute_path_checked(native_path).expect("absolute native path");
assert_eq!(
ApiPathString::from_abs_path(&native_path),
ApiPathString(r"C:\bad\".to_string())
);
let path = PathUri::from_abs_path(&native_path);
assert_eq!(
ApiPathString::from_path_uri(&path, PathConvention::Windows),
Ok(ApiPathString(r"C:\bad\".to_string()))
);
assert_eq!(
ApiPathString::from_path_uri(&path, PathConvention::Posix),
Err(ApiPathStringError::OpaqueFallback {
path: path.to_string(),
})
);
}
#[test]
fn serializes_and_deserializes_as_a_string() {
let path = PathUri::parse("file:///workspace/src/lib.rs").expect("valid file URI");
let rendered = ApiPathString::from_path_uri(&path, PathConvention::Posix)
.expect("POSIX URI should render");
let json = serde_json::to_string(&rendered).expect("rendered path should serialize");
assert_eq!(json, r#""/workspace/src/lib.rs""#);
assert_eq!(
serde_json::from_str::<ApiPathString>(&json)
.expect("rendered path should deserialize from a string"),
rendered
);
}
+84 -14
View File
@@ -17,6 +17,12 @@ use thiserror::Error;
use ts_rs::TS;
use url::Url;
mod api_path_string;
pub use api_path_string::ApiPathString;
pub use api_path_string::ApiPathStringError;
pub use api_path_string::PathConvention;
pub const FILE_SCHEME: &str = "file";
const BAD_PATH_URI_PREFIX: &str = "file:///%00/bad/path/";
@@ -29,15 +35,14 @@ const BAD_PATH_URI_PREFIX: &str = "file:///%00/bad/path/";
/// created by [`Self::from_abs_path`] are opaque to these lexical operations.
///
/// `file:` paths retain their URI spelling so they can be parsed independently
/// of the current host. In particular, `/C:/src` remains ambiguous between a
/// Windows drive path and a valid POSIX path until [`Self::to_abs_path`]
/// applies the current host's rules. A local POSIX `file:` URI can also retain
/// of the current host. A local POSIX `file:` URI can also retain
/// percent-encoded non-UTF-8 bytes for lossless native round trips.
///
/// Like [VS Code resources], path operations use `/` URI separators on every
/// host. They preserve a URL authority but do not infer Windows drive or UNC
/// roots from path text. Native path normalization, filesystem aliases,
/// symlinks, case sensitivity, and Unicode normalization are not resolved.
/// host. Lexical path operations preserve a URL authority without interpreting
/// Windows drive or UNC roots from path text. Native path normalization,
/// filesystem aliases, symlinks, case sensitivity, and Unicode normalization
/// are not resolved.
///
/// Serde represents a `PathUri` as its canonical URI string. Deserialization
/// also accepts an absolute native path for compatibility with fields that
@@ -76,22 +81,24 @@ impl PathUri {
}
#[cfg(unix)]
let encoded_path = {
let path_bytes = {
use std::os::unix::ffi::OsStrExt;
base64::engine::general_purpose::URL_SAFE_NO_PAD
.encode(path.as_path().as_os_str().as_bytes())
path.as_path().as_os_str().as_bytes().to_vec()
};
#[cfg(windows)]
let encoded_path = {
let path_bytes = {
use std::os::windows::ffi::OsStrExt;
let path_bytes = path
.as_path()
path.as_path()
.as_os_str()
.encode_wide()
.flat_map(u16::to_le_bytes)
.collect::<Vec<_>>();
base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(path_bytes)
.collect::<Vec<_>>()
};
Self::from_opaque_path_bytes(&path_bytes)
}
fn from_opaque_path_bytes(path_bytes: &[u8]) -> Self {
let encoded_path = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(path_bytes);
let Ok(uri) = Self::parse(&format!("{BAD_PATH_URI_PREFIX}{encoded_path}")) else {
unreachable!("URL-safe base64 always produces a valid fallback path URI");
};
@@ -117,6 +124,46 @@ impl PathUri {
self.0.path()
}
fn opaque_fallback_bytes(&self) -> Option<Vec<u8>> {
decode_bad_path_uri(&self.0)
}
/// Infers the native path convention represented by this URI.
///
/// A URI authority is treated as a Windows UNC host, and a leading
/// drive-letter segment such as `C:` is treated as a Windows drive. All
/// other ordinary file URIs are treated as POSIX paths. This deliberately
/// classifies `file:///C:/src` as Windows even though `/C:/src` is also a
/// valid POSIX path. In practice, POSIX paths with a drive-shaped first
/// component are rare enough that recognizing foreign Windows paths is the
/// more useful default.
///
/// Opaque fallback URIs are inspected for an absolute POSIX byte prefix or
/// an absolute Windows UTF-16LE prefix. `None` is returned when their
/// payload does not identify either convention.
///
/// TODO(anp): Once `PathUri` carries an environment identifier, prefer the
/// environment's declared convention over this spelling-based heuristic.
pub fn infer_path_convention(&self) -> Option<PathConvention> {
if let Some(path_bytes) = self.opaque_fallback_bytes() {
return infer_opaque_path_convention(&path_bytes);
}
if self.0.host_str().is_some() {
return Some(PathConvention::Windows);
}
let has_windows_drive = self
.0
.path_segments()
.and_then(|mut segments| segments.find(|segment| !segment.is_empty()))
.is_some_and(is_windows_drive_uri_segment);
if has_windows_drive {
Some(PathConvention::Windows)
} else {
Some(PathConvention::Posix)
}
}
/// Returns the decoded final URI path segment, or `None` for the URI root
/// or an opaque fallback URI created by [`Self::from_abs_path`].
///
@@ -377,6 +424,29 @@ fn decode_bad_path_uri(url: &Url) -> Option<Vec<u8>> {
.then_some(path_bytes)
}
fn is_windows_drive_uri_segment(segment: &str) -> bool {
matches!(segment.as_bytes(), [drive, b':'] if drive.is_ascii_alphabetic())
}
fn infer_opaque_path_convention(path_bytes: &[u8]) -> Option<PathConvention> {
if path_bytes.starts_with(b"/") {
return Some(PathConvention::Posix);
}
if !path_bytes.len().is_multiple_of(2) {
return None;
}
let mut path_wide = path_bytes
.chunks_exact(2)
.map(|bytes| u16::from_le_bytes([bytes[0], bytes[1]]));
let first = path_wide.next()?;
let second = path_wide.next()?;
let has_drive = u8::try_from(first).is_ok_and(|drive| drive.is_ascii_alphabetic())
&& second == u16::from(b':');
let has_unc_prefix = first == u16::from(b'\\') && second == u16::from(b'\\');
(has_drive || has_unc_prefix).then_some(PathConvention::Windows)
}
/// Rejects URI metadata that has no defined meaning for `file:` URIs.
fn validate_common_known_uri(url: &Url) -> Result<(), PathUriParseError> {
if !url.username().is_empty() || url.password().is_some() {
+36
View File
@@ -58,6 +58,42 @@ fn file_uri_parses_a_windows_path_on_any_host() {
);
}
#[test]
fn infers_path_conventions_from_uri_shape() {
for (uri, expected) in [
("file:///", Some(PathConvention::Posix)),
("file:///home/alice/src", Some(PathConvention::Posix)),
("file:///C:/Users/Alice/src", Some(PathConvention::Windows)),
("file:///d:", Some(PathConvention::Windows)),
("file://server/share/src", Some(PathConvention::Windows)),
// Opaque fallback for POSIX bytes `/tmp/null-\0-\xff-byte`.
(
"file:///%00/bad/path/L3RtcC9udWxsLQAt_y1ieXRl",
Some(PathConvention::Posix),
),
// Opaque fallback for Windows UTF-16LE `\\.\COM1\`.
(
"file:///%00/bad/path/XABcAC4AXABDAE8ATQAxAFwA",
Some(PathConvention::Windows),
),
("file:///%00/bad/path/YQ", None),
] {
let path = PathUri::parse(uri).expect("valid path URI");
assert_eq!(path.infer_path_convention(), expected, "inferring {uri}");
}
}
#[test]
fn drive_shaped_posix_uri_is_intentionally_inferred_as_windows() {
let path = PathUri::parse("file:///C:/actually/a/posix/path").expect("valid path URI");
// `/C:/...` is valid on POSIX, but treating this uncommon spelling as a
// Windows drive lets callers render the overwhelmingly more common foreign
// Windows URI without separately carrying its source convention.
assert_eq!(path.infer_path_convention(), Some(PathConvention::Windows));
}
#[cfg(windows)]
#[test]
fn file_uri_falls_back_for_windows_prefixes_without_a_uri_representation() {