Fix HTML export URL sanitization

This commit is contained in:
Mario Zechner
2026-06-02 16:30:14 +02:00
Unverified
parent 7e72ca47c8
commit 6cb23f9b5d
3 changed files with 30 additions and 11 deletions
+1
View File
@@ -10,6 +10,7 @@
### Fixed
- Fixed stored XSS in HTML session exports by sanitizing Markdown link and image URLs with a scheme allow-list after stripping control characters.
- Fixed SDK embedding in bundled Node apps failing with `ENOENT` when `package.json` is not present next to the bundle entrypoint. The package metadata reader now gracefully handles missing `package.json` by using defaults, enabling `createAgentSession()` without requiring package-adjacent files at runtime ([#5226](https://github.com/earendil-works/pi/issues/5226)).
- Fixed HTTP timeout setting not being respected for non-Codex providers (e.g., llama.cpp via OpenAI-compatible API). The `httpIdleTimeoutMs` setting (set via `/settings` HTTP timeout) now applies as the default SDK request timeout for all providers that support it, not just OpenAI Codex Responses. Disabling the timeout (HTTP timeout = false) now correctly disables SDK timeouts for all supported providers by sending a maximum int32 value (effectively infinite) instead of 0, since SDKs treat timeout=0 as an immediate timeout ([#5294](https://github.com/earendil-works/pi/issues/5294)).
- Fixed opening and listing very large JSONL session files by reading session entries line-by-line instead of materializing the full file as one string ([#5231](https://github.com/earendil-works/pi/issues/5231)).
@@ -613,6 +613,18 @@
.replace(/'/g, ''');
}
function sanitizeMarkdownUrl(value) {
const href = String(value || '').trim().replace(/[\x00-\x1f\x7f]/g, '');
if (!href) return href;
const scheme = href.match(/^([A-Za-z][A-Za-z0-9+.-]*):/);
if (scheme && !/^(https?|mailto|tel|ftp)$/i.test(scheme[1])) {
return null;
}
return href;
}
/**
* Truncate string to maxLen chars, append "..." if truncated.
*/
@@ -1569,10 +1581,11 @@
}
},
renderer: {
// Sanitize link URLs to prevent javascript:/vbscript:/data: XSS
// Sanitize link URLs with a scheme allow-list. Browsers strip C0
// controls from schemes, so strip them before checking and emitting.
link(token) {
const href = (token.href || '').trim();
if (/^\s*(javascript|vbscript|data):/i.test(href)) {
const href = sanitizeMarkdownUrl(token.href);
if (href === null) {
return this.parser.parseInline(token.tokens);
}
let out = '<a href="' + escapeHtml(href) + '"';
@@ -1582,10 +1595,10 @@
out += '>' + this.parser.parseInline(token.tokens) + '</a>';
return out;
},
// Sanitize image src URLs
// Sanitize image src URLs with the same scheme allow-list.
image(token) {
const href = (token.href || '').trim();
if (/^\s*(javascript|vbscript|data):/i.test(href)) {
const href = sanitizeMarkdownUrl(token.href);
if (href === null) {
return escapeHtml(token.text || '');
}
let out = '<img src="' + escapeHtml(href) + '" alt="' + escapeHtml(token.text || '') + '"';
@@ -4,15 +4,20 @@ import { describe, expect, it } from "vitest";
describe("export HTML markdown link sanitization", () => {
const templateJs = readFileSync(new URL("../src/core/export-html/template.js", import.meta.url), "utf-8");
it("overrides the marked link renderer to block javascript: protocol", () => {
// The custom link renderer must check for dangerous protocols
it("overrides the marked link renderer to use scheme allow-list sanitization", () => {
expect(templateJs).toMatch(/link\s*\(\s*token\s*\)/);
expect(templateJs).toMatch(/javascript/i);
expect(templateJs).toMatch(/vbscript/i);
expect(templateJs).toMatch(/sanitizeMarkdownUrl\(token\.href\)/);
expect(templateJs).toMatch(/\^\(https\?\|mailto\|tel\|ftp\)/);
});
it("overrides the marked image renderer to block javascript: protocol", () => {
it("overrides the marked image renderer to use scheme allow-list sanitization", () => {
expect(templateJs).toMatch(/image\s*\(\s*token\s*\)/);
expect(templateJs).toMatch(/sanitizeMarkdownUrl\(token\.href\)/);
});
it("strips C0 controls before checking and emitting markdown URLs", () => {
expect(templateJs).toContain("replace(/[\\x00-\\x1f\\x7f]/g, '')");
expect(templateJs).not.toMatch(/\^\\s\*\(javascript\|vbscript\|data\):/i);
});
it("escapes href attributes in the custom link renderer", () => {