fix(tui): linkify complete bare URLs with tildes (#27088)

## Background

Bare URLs containing `~` in their path are currently only clickable up
to the tilde in the interactive TUI. For example, Codex renders the
visible text for:


`https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf`

but the OSC 8 destination stops at `https://www.cs.tufts.edu/`. This
makes Cmd-click open the wrong location even though the terminal
recognizes the complete URL outside Codex.

Fixes #26774.

## Root Cause

The URL scanner already accepts `~`. The truncation happens earlier:
with strikethrough parsing enabled, `pulldown-cmark` splits this URL
into adjacent decoded `Event::Text` values around the tilde. The
Markdown renderer annotated each text event independently, so only the
first event still looked like a complete URL with a supported scheme.

The renderer now merges adjacent decoded text events before URL
annotation. It preserves the combined source range while retaining
parser-decoded contents, which avoids regressing entities such as
`&`.

## Changes

- Add a small iterator that merges adjacent decoded Markdown text events
and their source ranges.
- Apply it at the Markdown renderer boundary before hyperlink detection.
- Add regression coverage for the reported URL in prose, wrapped table
output, and entity-decoded URLs.

## How to Test

1. Run Codex with `just c`.
2. Ask the assistant to output this exact bare URL with no Markdown link
syntax:

`https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf`
3. Hold Cmd and hover or click the URL.
4. Confirm the complete URL, including the suffix after `~`, is one
destination.
5. Repeat with the URL inside a Markdown table and confirm wrapped
portions retain the same complete destination.

Targeted tests:

- `just test -p codex-tui url_with_tilde`
- `just test -p codex-tui merged_text_events_preserve_entity_decoding`

The full `codex-tui` test run was also executed. Its only failures were
the two existing Guardian feature-flag tests:

-
`app::tests::update_feature_flags_disabling_guardian_clears_review_policy_and_restores_default`
-
`app::tests::update_feature_flags_disabling_guardian_clears_manual_review_policy_without_history`
This commit is contained in:
Felipe Coury
2026-06-08 17:02:36 -07:00
committed by GitHub
Unverified
parent ffec7c0933
commit 4ca2e436e5
5 changed files with 156 additions and 1 deletions
+1
View File
@@ -153,6 +153,7 @@ mod local_chatgpt_auth;
mod markdown;
mod markdown_render;
mod markdown_stream;
mod markdown_text_merge;
mod mention_codec;
mod model_catalog;
mod model_migration;
+2 -1
View File
@@ -39,6 +39,7 @@
//! body rows, or even 3-char-wide columns cannot fit, body rows render as
//! key/value records.
use crate::markdown_text_merge::DecodedTextMerge;
use crate::render::highlight::foreground_style_for_scopes;
use crate::render::highlight::highlight_code_to_lines;
use crate::render::line_utils::line_to_static;
@@ -322,7 +323,7 @@ pub(crate) fn render_markdown_lines_with_width_and_cwd(
let mut options = Options::empty();
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_TABLES);
let parser = Parser::new_ext(input, options).into_offset_iter();
let parser = DecodedTextMerge::new(Parser::new_ext(input, options).into_offset_iter());
let mut w = Writer::new(input, parser, width, cwd);
w.run();
w.text
+88
View File
@@ -8,9 +8,11 @@ use std::path::Path;
use crate::markdown_render::COLON_LOCATION_SUFFIX_RE;
use crate::markdown_render::HASH_LOCATION_SUFFIX_RE;
use crate::markdown_render::render_markdown_lines_with_width_and_cwd;
use crate::markdown_render::render_markdown_text;
use crate::markdown_render::render_markdown_text_with_width;
use crate::markdown_render::render_markdown_text_with_width_and_cwd;
use insta::assert_debug_snapshot;
use insta::assert_snapshot;
fn render_markdown_text_for_cwd(input: &str, cwd: &Path) -> Text<'static> {
@@ -29,6 +31,92 @@ fn plain_lines(text: &Text<'_>) -> Vec<String> {
.collect()
}
#[test]
fn bare_url_with_tilde_keeps_complete_hyperlink() {
let destination =
"https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf";
let lines = render_markdown_lines_with_width_and_cwd(
destination,
/*width*/ Some(80),
/*cwd*/ None,
);
let rendered = lines
.iter()
.map(|line| {
let text = line
.line
.spans
.iter()
.map(|span| span.content.as_ref())
.collect::<String>();
let hyperlinks = line
.hyperlinks
.iter()
.map(|link| (link.columns.clone(), link.destination.as_str()))
.collect::<Vec<_>>();
(text, hyperlinks)
})
.collect::<Vec<_>>();
assert_debug_snapshot!(rendered);
}
#[test]
fn table_url_with_tilde_keeps_complete_hyperlink() {
let destination =
"https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf";
let markdown = format!("| URL |\n| --- |\n| {destination} |\n");
let lines = render_markdown_lines_with_width_and_cwd(
&markdown,
/*width*/ Some(32),
/*cwd*/ None,
);
let destinations = lines
.iter()
.flat_map(|line| line.hyperlinks.iter())
.map(|link| link.destination.as_str())
.collect::<Vec<_>>();
assert!(!destinations.is_empty());
assert_eq!(destinations, vec![destination; destinations.len()]);
}
#[test]
fn merged_text_events_preserve_entity_decoding() {
let source = "https://example.com/a&amp;b~c";
let destination = "https://example.com/a&b~c";
let lines = render_markdown_lines_with_width_and_cwd(
source,
/*width*/ Some(80),
/*cwd*/ None,
);
let rendered = lines
.iter()
.map(|line| {
let text = line
.line
.spans
.iter()
.map(|span| span.content.as_ref())
.collect::<String>();
let hyperlinks = line
.hyperlinks
.iter()
.map(|link| (link.columns.clone(), link.destination.as_str()))
.collect::<Vec<_>>();
(text, hyperlinks)
})
.collect::<Vec<_>>();
assert_eq!(
rendered,
vec![(
destination.to_string(),
vec![(0..destination.len(), destination)],
)],
);
}
#[test]
fn empty() {
assert_eq!(render_markdown_text(""), Text::default());
+50
View File
@@ -0,0 +1,50 @@
//! Markdown text-event merging that preserves parser-decoded contents and source offsets.
use std::iter::Peekable;
use std::ops::Range;
use pulldown_cmark::Event;
/// Merges adjacent parsed text events without reconstructing them from the Markdown source.
///
/// Markdown extensions can split visually contiguous text around delimiter characters. Keeping the
/// decoded event contents together lets downstream consumers recognize tokens that span those
/// parser boundaries while the combined source range remains available for offset-aware rendering.
pub(crate) struct DecodedTextMerge<I: Iterator> {
iter: Peekable<I>,
}
impl<I: Iterator> DecodedTextMerge<I> {
pub(crate) fn new(iter: I) -> Self {
Self {
iter: iter.peekable(),
}
}
}
impl<'a, I> Iterator for DecodedTextMerge<I>
where
I: Iterator<Item = (Event<'a>, Range<usize>)>,
{
type Item = (Event<'a>, Range<usize>);
fn next(&mut self) -> Option<Self::Item> {
let (event, mut range) = self.iter.next()?;
let Event::Text(text) = event else {
return Some((event, range));
};
if !matches!(self.iter.peek(), Some((Event::Text(_), _))) {
return Some((Event::Text(text), range));
}
let mut merged = text.into_string();
while matches!(self.iter.peek(), Some((Event::Text(_), _))) {
let Some((Event::Text(text), next_range)) = self.iter.next() else {
break;
};
merged.push_str(&text);
range.end = next_range.end;
}
Some((Event::Text(merged.into()), range))
}
}
@@ -0,0 +1,15 @@
---
source: tui/src/markdown_render_tests.rs
expression: rendered
---
[
(
"https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf",
[
(
0..72,
"https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf",
),
],
),
]