fix(chat): 解析 XML 的 cdn 链接时进行反转义

- 新增 _normalize_xml_url 统一处理 & 等转义字符 - 图片/视频/表情消息的 cdn URL 解析统一规范化，避免误判为 file_id
2026-02-19 14:20:51 +08:00 · 2025-12-26 21:45:40 +08:00
parent 33be55bcca
commit 761648f15a
2 changed files with 20 additions and 2 deletions
--- a/src/wechat_decrypt_tool/chat_helpers.py
+++ b/src/wechat_decrypt_tool/chat_helpers.py
@@ -530,6 +530,17 @@ def _strip_cdata(s: str) -> str:
    return out.strip()
 def _normalize_xml_url(url: str) -> str:
    """Normalize URLs extracted from XML attributes/tags (e.g. decode '&amp;')."""
    u = str(url or "").strip()
    if not u:
        return ""
    try:
        return html.unescape(u).strip()
    except Exception:
        return u.replace("&amp;", "&").strip()
 def _extract_xml_tag_text(xml_text: str, tag: str) -> str:
    if not xml_text or not tag:
        return ""
--- a/src/wechat_decrypt_tool/routers/chat.py
+++ b/src/wechat_decrypt_tool/routers/chat.py
@@ -33,6 +33,7 @@ from ..chat_helpers import (
    _load_contact_rows,
    _load_latest_message_previews,
    _lookup_resource_md5,
    _normalize_xml_url,
    _parse_app_message,
    _parse_pat_message,
    _pick_avatar_url,
@@ -481,8 +482,10 @@ def _append_full_messages_from_rows(
                or _extract_xml_tag_text(raw_text, "cdnmidimgurl")
                or _extract_xml_tag_text(raw_text, "cdnbigimgurl")
            )
-            _cdn_url_or_id = str(_cdn_url_or_id or "").strip()
+            _cdn_url_or_id = _normalize_xml_url(_cdn_url_or_id)
-            image_url = _cdn_url_or_id if _cdn_url_or_id.startswith(("http://", "https://")) else ""
+            image_url = (
                _cdn_url_or_id if str(_cdn_url_or_id).lower().startswith(("http://", "https://")) else ""
            )
            if (not image_url) and _cdn_url_or_id:
                image_file_id = _cdn_url_or_id
@@ -512,6 +515,9 @@ def _append_full_messages_from_rows(
                raw_text, "cdnvideourl"
            )
            video_thumb_url_or_id = _normalize_xml_url(video_thumb_url_or_id)
            video_url_or_id = _normalize_xml_url(video_url_or_id)
            video_thumb_url = (
                video_thumb_url_or_id
                if str(video_thumb_url_or_id or "").strip().lower().startswith(("http://", "https://"))
@@ -542,6 +548,7 @@ def _append_full_messages_from_rows(
            emoji_url = _extract_xml_attr(raw_text, "cdnurl")
            if not emoji_url:
                emoji_url = _extract_xml_tag_text(raw_text, "cdn_url")
            emoji_url = _normalize_xml_url(emoji_url)
            if (not emoji_md5) and resource_conn is not None:
                emoji_md5 = _lookup_resource_md5(
                    resource_conn,