fix(chat): 解析 XML 的 cdn 链接时进行反转义

- 新增 _normalize_xml_url 统一处理 & 等转义字符
- 图片/视频/表情消息的 cdn URL 解析统一规范化,避免误判为 file_id
This commit is contained in:
2977094657
2025-12-26 21:45:40 +08:00
parent 33be55bcca
commit 761648f15a
2 changed files with 20 additions and 2 deletions

View File

@@ -530,6 +530,17 @@ def _strip_cdata(s: str) -> str:
return out.strip()
def _normalize_xml_url(url: str) -> str:
"""Normalize URLs extracted from XML attributes/tags (e.g. decode '&')."""
u = str(url or "").strip()
if not u:
return ""
try:
return html.unescape(u).strip()
except Exception:
return u.replace("&", "&").strip()
def _extract_xml_tag_text(xml_text: str, tag: str) -> str:
if not xml_text or not tag:
return ""

View File

@@ -33,6 +33,7 @@ from ..chat_helpers import (
_load_contact_rows,
_load_latest_message_previews,
_lookup_resource_md5,
_normalize_xml_url,
_parse_app_message,
_parse_pat_message,
_pick_avatar_url,
@@ -481,8 +482,10 @@ def _append_full_messages_from_rows(
or _extract_xml_tag_text(raw_text, "cdnmidimgurl")
or _extract_xml_tag_text(raw_text, "cdnbigimgurl")
)
_cdn_url_or_id = str(_cdn_url_or_id or "").strip()
image_url = _cdn_url_or_id if _cdn_url_or_id.startswith(("http://", "https://")) else ""
_cdn_url_or_id = _normalize_xml_url(_cdn_url_or_id)
image_url = (
_cdn_url_or_id if str(_cdn_url_or_id).lower().startswith(("http://", "https://")) else ""
)
if (not image_url) and _cdn_url_or_id:
image_file_id = _cdn_url_or_id
@@ -512,6 +515,9 @@ def _append_full_messages_from_rows(
raw_text, "cdnvideourl"
)
video_thumb_url_or_id = _normalize_xml_url(video_thumb_url_or_id)
video_url_or_id = _normalize_xml_url(video_url_or_id)
video_thumb_url = (
video_thumb_url_or_id
if str(video_thumb_url_or_id or "").strip().lower().startswith(("http://", "https://"))
@@ -542,6 +548,7 @@ def _append_full_messages_from_rows(
emoji_url = _extract_xml_attr(raw_text, "cdnurl")
if not emoji_url:
emoji_url = _extract_xml_tag_text(raw_text, "cdn_url")
emoji_url = _normalize_xml_url(emoji_url)
if (not emoji_md5) and resource_conn is not None:
emoji_md5 = _lookup_resource_md5(
resource_conn,