fix(chat): 合并转发记录的图片/表情解析与媒体兜底

- 前端：recordItem XML 清理非法控制字符并转义裸 &，补齐 fromnewmsgid/cdnurlstring/aeskey 等字段 - 前端：图片接口追加 server_id；表情接口透传 emoji_url/aes_key - 后端：/api/chat/media/image 支持 server_id，从 message_resource.db 反查 packed_info 资源 md5 并优先定位 - 后端：/api/chat/media/emoji 支持 emoji_url/aes_key，本地缺失时安全下载并尝试 AES-CBC 解密识别
2026-02-03 06:20:51 +08:00 · 2026-01-02 22:09:17 +08:00
parent 13d9e7a3bd
commit 03af287aea
2 changed files with 139 additions and 11 deletions
--- a/frontend/pages/chat/[[username]].vue
+++ b/frontend/pages/chat/[[username]].vue
@@ -3865,7 +3865,12 @@ const parseChatHistoryRecord = (recordItemXml) => {
  const xml = String(recordItemXml || '').trim()
  if (!xml) return { info: null, items: [] }

-  const normalized = xml.replace(/&#x20;/g, ' ')
+  const normalized = xml
+    .replace(/&#x20;/g, ' ')
+    // Strip control characters that are illegal in XML 1.0 (common in some recordItem payloads)
+    .replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F]/g, '')
+    // Escape stray ampersands (URLs sometimes contain raw '&' instead of '&amp;')
+    .replace(/&(?!amp;|lt;|gt;|quot;|apos;|#\d+;|#x[\da-fA-F]+;)/g, '&amp;')
  let doc
  try {
    doc = new DOMParser().parseFromString(normalized, 'text/xml')
@@ -3906,6 +3911,13 @@ const parseChatHistoryRecord = (recordItemXml) => {
    const fullmd5 = getText(node, 'fullmd5')
    const thumbfullmd5 = getText(node, 'thumbfullmd5')
    const md5 = getText(node, 'md5') || getText(node, 'emoticonmd5') || getText(node, 'emojiMd5')
+    const fromnewmsgid = getText(node, 'fromnewmsgid')
+    const srcMsgLocalid = getText(node, 'srcMsgLocalid')
+    const srcMsgCreateTime = getText(node, 'srcMsgCreateTime')
+    const cdnurlstring = normalizeChatHistoryUrl(getText(node, 'cdnurlstring'))
+    const encrypturlstring = normalizeChatHistoryUrl(getText(node, 'encrypturlstring'))
+    const externurl = normalizeChatHistoryUrl(getText(node, 'externurl'))
+    const aeskey = getText(node, 'aeskey')

    let content = datatitle || datadesc
    if (!content) {
@@ -3948,6 +3960,13 @@ const parseChatHistoryRecord = (recordItemXml) => {
      fullmd5,
      thumbfullmd5,
      md5,
+      fromnewmsgid,
+      srcMsgLocalid,
+      srcMsgCreateTime,
+      cdnurlstring,
+      encrypturlstring,
+      externurl,
+      aeskey,
      renderType,
      content
    }
@@ -3998,15 +4017,23 @@ const normalizeChatHistoryRecordItem = (rec) => {
    if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[视频]'
  } else if (out.renderType === 'emoji') {
    out.emojiMd5 = pickFirstMd5(out.md5, out.fullmd5, out.thumbfullmd5)
+    const remoteEmojiUrl = String(out.cdnurlstring || out.externurl || out.encrypturlstring || '').trim()
+    const remoteAesKey = String(out.aeskey || '').trim()
+    out.emojiRemoteUrl = remoteEmojiUrl
    out.emojiUrl = out.emojiMd5
-      ? `${mediaBase}/api/chat/media/emoji?account=${account}&md5=${encodeURIComponent(out.emojiMd5)}&username=${username}`
+      ? `${mediaBase}/api/chat/media/emoji?account=${account}&md5=${encodeURIComponent(out.emojiMd5)}&username=${username}${remoteEmojiUrl ? `&emoji_url=${encodeURIComponent(remoteEmojiUrl)}` : ''}${remoteAesKey ? `&aes_key=${encodeURIComponent(remoteAesKey)}` : ''}`
      : ''
    if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[表情]'
  } else if (out.renderType === 'image') {
    out.imageMd5 = pickFirstMd5(out.fullmd5, out.thumbfullmd5, out.md5)
-    out.imageUrl = out.imageMd5
-      ? `${mediaBase}/api/chat/media/image?account=${account}&md5=${encodeURIComponent(out.imageMd5)}&username=${username}`
-      : ''
+    const srcServerId = String(out.fromnewmsgid || '').trim()
+    const imgParts = [
+      `account=${account}`,
+      out.imageMd5 ? `md5=${encodeURIComponent(out.imageMd5)}` : '',
+      srcServerId ? `server_id=${encodeURIComponent(srcServerId)}` : '',
+      `username=${username}`
+    ].filter(Boolean)
+    out.imageUrl = imgParts.length ? `${mediaBase}/api/chat/media/image?${imgParts.join('&')}` : ''
    if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[图片]'
  }

--- a/src/wechat_decrypt_tool/routers/chat_media.py
+++ b/src/wechat_decrypt_tool/routers/chat_media.py
@@ -1,6 +1,7 @@
 import asyncio
 from functools import lru_cache
 import hashlib
+import html
 import ipaddress
 import mimetypes
 import os
@@ -18,18 +19,20 @@ from pydantic import BaseModel, Field
 from ..logging_config import get_logger
 from ..media_helpers import (
    _convert_silk_to_wav,
+    _decrypt_emoticon_aes_cbc,
    _detect_image_extension,
    _detect_image_media_type,
-    _is_probably_valid_image,
-    _iter_media_source_candidates,
-    _order_media_candidates,
+    _download_http_bytes,
    _ensure_decrypted_resource_for_md5,
    _fallback_search_media_by_file_id,
    _fallback_search_media_by_md5,
    _get_decrypted_resource_path,
    _get_resource_dir,
    _guess_media_type_by_path,
+    _is_probably_valid_image,
    _iter_emoji_source_candidates,
+    _iter_media_source_candidates,
+    _order_media_candidates,
    _read_and_maybe_decrypt_media,
    _resolve_account_db_storage_dir,
    _resolve_account_dir,
@@ -40,6 +43,7 @@ from ..media_helpers import (
    _try_find_decrypted_resource,
    _try_strip_media_prefix,
 )
+from ..chat_helpers import _extract_md5_from_packed_info
 from ..path_fix import PathFixRoute

 logger = get_logger(__name__)
@@ -300,6 +304,51 @@ def _is_valid_md5(s: str) -> bool:
    return bool(re.fullmatch(r"[0-9a-f]{32}", v))


+@lru_cache(maxsize=4096)
+def _lookup_resource_md5_by_server_id(account_dir_str: str, server_id: int, want_local_type: int = 0) -> str:
+    """Resolve on-disk resource md5 from message_resource.db by message_svr_id.
+
+    WeChat 4.x often stores media on disk using an md5 derived from `packed_info` rather than
+    the `fullmd5/thumbfullmd5` values found in message XML (including merged-forward records).
+    """
+    account_dir_str = str(account_dir_str or "").strip()
+    if not account_dir_str:
+        return ""
+    try:
+        sid = int(server_id or 0)
+    except Exception:
+        sid = 0
+    if not sid:
+        return ""
+
+    account_dir = Path(account_dir_str)
+    db_path = account_dir / "message_resource.db"
+    if not db_path.exists():
+        return ""
+
+    conn = sqlite3.connect(str(db_path))
+    try:
+        row = conn.execute(
+            "SELECT message_local_type, packed_info FROM MessageResourceInfo "
+            "WHERE message_svr_id = ? ORDER BY message_create_time DESC LIMIT 1",
+            (sid,),
+        ).fetchone()
+        if not row:
+            return ""
+        if want_local_type and int(row[0] or 0) != int(want_local_type):
+            return ""
+        md5 = _extract_md5_from_packed_info(row[1])
+        md5 = str(md5 or "").strip().lower()
+        return md5 if _is_valid_md5(md5) else ""
+    except Exception:
+        return ""
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass
+
+
 def _is_safe_http_url(url: str) -> bool:
    u = str(url or "").strip()
    if not u:
@@ -459,12 +508,13 @@ async def download_chat_emoji(req: EmojiDownloadRequest):
 async def get_chat_image(
    md5: Optional[str] = None,
    file_id: Optional[str] = None,
+    server_id: Optional[int] = None,
    account: Optional[str] = None,
    username: Optional[str] = None,
    deep_scan: bool = False,
 ):
-    if (not md5) and (not file_id):
-        raise HTTPException(status_code=400, detail="Missing md5/file_id.")
+    if (not md5) and (not file_id) and (not server_id):
+        raise HTTPException(status_code=400, detail="Missing md5/file_id/server_id.")

    # Some WeChat versions put non-MD5 identifiers in the "md5" field; treat them as file_id.
    if md5 and (not file_id) and (not _is_valid_md5(str(md5))):
@@ -472,6 +522,13 @@ async def get_chat_image(
        md5 = None
    account_dir = _resolve_account_dir(account)

+    # Prefer resource md5 derived from message_resource.db for chat history / app messages.
+    # This matches how regular image messages are resolved elsewhere in the codebase.
+    if server_id:
+        resource_md5 = _lookup_resource_md5_by_server_id(str(account_dir), int(server_id), want_local_type=3)
+        if resource_md5:
+            md5 = resource_md5
+
    # md5 模式：优先从解密资源目录读取（更快）
    if md5:
        decrypted_path = _try_find_decrypted_resource(account_dir, str(md5).lower())
@@ -620,7 +677,13 @@ async def get_chat_image(


@router.get("/api/chat/media/emoji", summary="获取表情消息资源")
-async def get_chat_emoji(md5: str, account: Optional[str] = None, username: Optional[str] = None):
+async def get_chat_emoji(
+    md5: str,
+    account: Optional[str] = None,
+    username: Optional[str] = None,
+    emoji_url: Optional[str] = None,
+    aes_key: Optional[str] = None,
+):
    if not md5:
        raise HTTPException(status_code=400, detail="Missing md5.")
    account_dir = _resolve_account_dir(account)
@@ -652,6 +715,44 @@ async def get_chat_emoji(md5: str, account: Optional[str] = None, username: Opti
        if data2 is not None and mt2:
            data, media_type = data2, mt2

+    if media_type == "application/octet-stream" and emoji_url:
+        # Some merged-forward records include CDN URLs and AES keys inside recordItem, but the md5
+        # is missing from emoticon.db; allow the client to provide a safe remote URL as fallback.
+        url = html.unescape(str(emoji_url or "")).strip()
+        if url:
+            try:
+                payload = _download_http_bytes(url)
+            except Exception:
+                payload = b""
+
+            candidates: list[bytes] = [payload] if payload else []
+            dec = _decrypt_emoticon_aes_cbc(payload, str(aes_key or "").strip()) if payload and aes_key else None
+            if dec is not None:
+                candidates.insert(0, dec)
+
+            for blob in candidates:
+                if not blob:
+                    continue
+                try:
+                    data2, mt = _try_strip_media_prefix(blob)
+                except Exception:
+                    data2, mt = blob, "application/octet-stream"
+
+                if mt == "application/octet-stream":
+                    mt = _detect_image_media_type(data2[:32])
+                if mt == "application/octet-stream":
+                    try:
+                        if len(data2) >= 8 and data2[4:8] == b"ftyp":
+                            mt = "video/mp4"
+                    except Exception:
+                        pass
+
+                if mt.startswith("image/") and (not _is_probably_valid_image(data2, mt)):
+                    continue
+                if mt != "application/octet-stream":
+                    data, media_type = data2, mt
+                    break
+
    if (not p) and media_type == "application/octet-stream":
        raise HTTPException(status_code=404, detail="Emoji not found.")