From 4ca2e436e50951baa1ea74246fe09bbbff1fda4a Mon Sep 17 00:00:00 2001 From: Felipe Coury Date: Mon, 8 Jun 2026 17:02:36 -0700 Subject: [PATCH] fix(tui): linkify complete bare URLs with tildes (#27088) ## Background Bare URLs containing `~` in their path are currently only clickable up to the tilde in the interactive TUI. For example, Codex renders the visible text for: `https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf` but the OSC 8 destination stops at `https://www.cs.tufts.edu/`. This makes Cmd-click open the wrong location even though the terminal recognizes the complete URL outside Codex. Fixes #26774. ## Root Cause The URL scanner already accepts `~`. The truncation happens earlier: with strikethrough parsing enabled, `pulldown-cmark` splits this URL into adjacent decoded `Event::Text` values around the tilde. The Markdown renderer annotated each text event independently, so only the first event still looked like a complete URL with a supported scheme. The renderer now merges adjacent decoded text events before URL annotation. It preserves the combined source range while retaining parser-decoded contents, which avoids regressing entities such as `&`. ## Changes - Add a small iterator that merges adjacent decoded Markdown text events and their source ranges. - Apply it at the Markdown renderer boundary before hyperlink detection. - Add regression coverage for the reported URL in prose, wrapped table output, and entity-decoded URLs. ## How to Test 1. Run Codex with `just c`. 2. Ask the assistant to output this exact bare URL with no Markdown link syntax: `https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf` 3. Hold Cmd and hover or click the URL. 4. Confirm the complete URL, including the suffix after `~`, is one destination. 5. Repeat with the URL inside a Markdown table and confirm wrapped portions retain the same complete destination. Targeted tests: - `just test -p codex-tui url_with_tilde` - `just test -p codex-tui merged_text_events_preserve_entity_decoding` The full `codex-tui` test run was also executed. Its only failures were the two existing Guardian feature-flag tests: - `app::tests::update_feature_flags_disabling_guardian_clears_review_policy_and_restores_default` - `app::tests::update_feature_flags_disabling_guardian_clears_manual_review_policy_without_history` --- codex-rs/tui/src/lib.rs | 1 + codex-rs/tui/src/markdown_render.rs | 3 +- codex-rs/tui/src/markdown_render_tests.rs | 88 +++++++++++++++++++ codex-rs/tui/src/markdown_text_merge.rs | 50 +++++++++++ ...l_with_tilde_keeps_complete_hyperlink.snap | 15 ++++ 5 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 codex-rs/tui/src/markdown_text_merge.rs create mode 100644 codex-rs/tui/src/snapshots/codex_tui__markdown_render__markdown_render_tests__bare_url_with_tilde_keeps_complete_hyperlink.snap diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 5e6b6cf81..6b98bee19 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -153,6 +153,7 @@ mod local_chatgpt_auth; mod markdown; mod markdown_render; mod markdown_stream; +mod markdown_text_merge; mod mention_codec; mod model_catalog; mod model_migration; diff --git a/codex-rs/tui/src/markdown_render.rs b/codex-rs/tui/src/markdown_render.rs index 9847c6dc3..52b603bce 100644 --- a/codex-rs/tui/src/markdown_render.rs +++ b/codex-rs/tui/src/markdown_render.rs @@ -39,6 +39,7 @@ //! body rows, or even 3-char-wide columns cannot fit, body rows render as //! key/value records. +use crate::markdown_text_merge::DecodedTextMerge; use crate::render::highlight::foreground_style_for_scopes; use crate::render::highlight::highlight_code_to_lines; use crate::render::line_utils::line_to_static; @@ -322,7 +323,7 @@ pub(crate) fn render_markdown_lines_with_width_and_cwd( let mut options = Options::empty(); options.insert(Options::ENABLE_STRIKETHROUGH); options.insert(Options::ENABLE_TABLES); - let parser = Parser::new_ext(input, options).into_offset_iter(); + let parser = DecodedTextMerge::new(Parser::new_ext(input, options).into_offset_iter()); let mut w = Writer::new(input, parser, width, cwd); w.run(); w.text diff --git a/codex-rs/tui/src/markdown_render_tests.rs b/codex-rs/tui/src/markdown_render_tests.rs index 206bae1b7..e0e498e69 100644 --- a/codex-rs/tui/src/markdown_render_tests.rs +++ b/codex-rs/tui/src/markdown_render_tests.rs @@ -8,9 +8,11 @@ use std::path::Path; use crate::markdown_render::COLON_LOCATION_SUFFIX_RE; use crate::markdown_render::HASH_LOCATION_SUFFIX_RE; +use crate::markdown_render::render_markdown_lines_with_width_and_cwd; use crate::markdown_render::render_markdown_text; use crate::markdown_render::render_markdown_text_with_width; use crate::markdown_render::render_markdown_text_with_width_and_cwd; +use insta::assert_debug_snapshot; use insta::assert_snapshot; fn render_markdown_text_for_cwd(input: &str, cwd: &Path) -> Text<'static> { @@ -29,6 +31,92 @@ fn plain_lines(text: &Text<'_>) -> Vec { .collect() } +#[test] +fn bare_url_with_tilde_keeps_complete_hyperlink() { + let destination = + "https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf"; + let lines = render_markdown_lines_with_width_and_cwd( + destination, + /*width*/ Some(80), + /*cwd*/ None, + ); + let rendered = lines + .iter() + .map(|line| { + let text = line + .line + .spans + .iter() + .map(|span| span.content.as_ref()) + .collect::(); + let hyperlinks = line + .hyperlinks + .iter() + .map(|link| (link.columns.clone(), link.destination.as_str())) + .collect::>(); + (text, hyperlinks) + }) + .collect::>(); + + assert_debug_snapshot!(rendered); +} + +#[test] +fn table_url_with_tilde_keeps_complete_hyperlink() { + let destination = + "https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf"; + let markdown = format!("| URL |\n| --- |\n| {destination} |\n"); + let lines = render_markdown_lines_with_width_and_cwd( + &markdown, + /*width*/ Some(32), + /*cwd*/ None, + ); + let destinations = lines + .iter() + .flat_map(|line| line.hyperlinks.iter()) + .map(|link| link.destination.as_str()) + .collect::>(); + + assert!(!destinations.is_empty()); + assert_eq!(destinations, vec![destination; destinations.len()]); +} + +#[test] +fn merged_text_events_preserve_entity_decoding() { + let source = "https://example.com/a&b~c"; + let destination = "https://example.com/a&b~c"; + let lines = render_markdown_lines_with_width_and_cwd( + source, + /*width*/ Some(80), + /*cwd*/ None, + ); + let rendered = lines + .iter() + .map(|line| { + let text = line + .line + .spans + .iter() + .map(|span| span.content.as_ref()) + .collect::(); + let hyperlinks = line + .hyperlinks + .iter() + .map(|link| (link.columns.clone(), link.destination.as_str())) + .collect::>(); + (text, hyperlinks) + }) + .collect::>(); + + assert_eq!( + rendered, + vec![( + destination.to_string(), + vec![(0..destination.len(), destination)], + )], + ); +} + #[test] fn empty() { assert_eq!(render_markdown_text(""), Text::default()); diff --git a/codex-rs/tui/src/markdown_text_merge.rs b/codex-rs/tui/src/markdown_text_merge.rs new file mode 100644 index 000000000..bad46fe5d --- /dev/null +++ b/codex-rs/tui/src/markdown_text_merge.rs @@ -0,0 +1,50 @@ +//! Markdown text-event merging that preserves parser-decoded contents and source offsets. + +use std::iter::Peekable; +use std::ops::Range; + +use pulldown_cmark::Event; + +/// Merges adjacent parsed text events without reconstructing them from the Markdown source. +/// +/// Markdown extensions can split visually contiguous text around delimiter characters. Keeping the +/// decoded event contents together lets downstream consumers recognize tokens that span those +/// parser boundaries while the combined source range remains available for offset-aware rendering. +pub(crate) struct DecodedTextMerge { + iter: Peekable, +} + +impl DecodedTextMerge { + pub(crate) fn new(iter: I) -> Self { + Self { + iter: iter.peekable(), + } + } +} + +impl<'a, I> Iterator for DecodedTextMerge +where + I: Iterator, Range)>, +{ + type Item = (Event<'a>, Range); + + fn next(&mut self) -> Option { + let (event, mut range) = self.iter.next()?; + let Event::Text(text) = event else { + return Some((event, range)); + }; + if !matches!(self.iter.peek(), Some((Event::Text(_), _))) { + return Some((Event::Text(text), range)); + } + + let mut merged = text.into_string(); + while matches!(self.iter.peek(), Some((Event::Text(_), _))) { + let Some((Event::Text(text), next_range)) = self.iter.next() else { + break; + }; + merged.push_str(&text); + range.end = next_range.end; + } + Some((Event::Text(merged.into()), range)) + } +} diff --git a/codex-rs/tui/src/snapshots/codex_tui__markdown_render__markdown_render_tests__bare_url_with_tilde_keeps_complete_hyperlink.snap b/codex-rs/tui/src/snapshots/codex_tui__markdown_render__markdown_render_tests__bare_url_with_tilde_keeps_complete_hyperlink.snap new file mode 100644 index 000000000..c10dccd9c --- /dev/null +++ b/codex-rs/tui/src/snapshots/codex_tui__markdown_render__markdown_render_tests__bare_url_with_tilde_keeps_complete_hyperlink.snap @@ -0,0 +1,15 @@ +--- +source: tui/src/markdown_render_tests.rs +expression: rendered +--- +[ + ( + "https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf", + [ + ( + 0..72, + "https://www.cs.tufts.edu/~nr/cs257/archive/olin-shivers/dissertation.pdf", + ), + ], + ), +]