mirror of
https://github.com/LifeArchiveProject/WeChatDataAnalysis.git
synced 2026-02-18 13:50:49 +08:00
fix(chat): 合并转发记录的图片/表情解析与媒体兜底
- 前端:recordItem XML 清理非法控制字符并转义裸 &,补齐 fromnewmsgid/cdnurlstring/aeskey 等字段 - 前端:图片接口追加 server_id;表情接口透传 emoji_url/aes_key - 后端:/api/chat/media/image 支持 server_id,从 message_resource.db 反查 packed_info 资源 md5 并优先定位 - 后端:/api/chat/media/emoji 支持 emoji_url/aes_key,本地缺失时安全下载并尝试 AES-CBC 解密识别
This commit is contained in:
@@ -3865,7 +3865,12 @@ const parseChatHistoryRecord = (recordItemXml) => {
|
|||||||
const xml = String(recordItemXml || '').trim()
|
const xml = String(recordItemXml || '').trim()
|
||||||
if (!xml) return { info: null, items: [] }
|
if (!xml) return { info: null, items: [] }
|
||||||
|
|
||||||
const normalized = xml.replace(/ /g, ' ')
|
const normalized = xml
|
||||||
|
.replace(/ /g, ' ')
|
||||||
|
// Strip control characters that are illegal in XML 1.0 (common in some recordItem payloads)
|
||||||
|
.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F]/g, '')
|
||||||
|
// Escape stray ampersands (URLs sometimes contain raw '&' instead of '&')
|
||||||
|
.replace(/&(?!amp;|lt;|gt;|quot;|apos;|#\d+;|#x[\da-fA-F]+;)/g, '&')
|
||||||
let doc
|
let doc
|
||||||
try {
|
try {
|
||||||
doc = new DOMParser().parseFromString(normalized, 'text/xml')
|
doc = new DOMParser().parseFromString(normalized, 'text/xml')
|
||||||
@@ -3906,6 +3911,13 @@ const parseChatHistoryRecord = (recordItemXml) => {
|
|||||||
const fullmd5 = getText(node, 'fullmd5')
|
const fullmd5 = getText(node, 'fullmd5')
|
||||||
const thumbfullmd5 = getText(node, 'thumbfullmd5')
|
const thumbfullmd5 = getText(node, 'thumbfullmd5')
|
||||||
const md5 = getText(node, 'md5') || getText(node, 'emoticonmd5') || getText(node, 'emojiMd5')
|
const md5 = getText(node, 'md5') || getText(node, 'emoticonmd5') || getText(node, 'emojiMd5')
|
||||||
|
const fromnewmsgid = getText(node, 'fromnewmsgid')
|
||||||
|
const srcMsgLocalid = getText(node, 'srcMsgLocalid')
|
||||||
|
const srcMsgCreateTime = getText(node, 'srcMsgCreateTime')
|
||||||
|
const cdnurlstring = normalizeChatHistoryUrl(getText(node, 'cdnurlstring'))
|
||||||
|
const encrypturlstring = normalizeChatHistoryUrl(getText(node, 'encrypturlstring'))
|
||||||
|
const externurl = normalizeChatHistoryUrl(getText(node, 'externurl'))
|
||||||
|
const aeskey = getText(node, 'aeskey')
|
||||||
|
|
||||||
let content = datatitle || datadesc
|
let content = datatitle || datadesc
|
||||||
if (!content) {
|
if (!content) {
|
||||||
@@ -3948,6 +3960,13 @@ const parseChatHistoryRecord = (recordItemXml) => {
|
|||||||
fullmd5,
|
fullmd5,
|
||||||
thumbfullmd5,
|
thumbfullmd5,
|
||||||
md5,
|
md5,
|
||||||
|
fromnewmsgid,
|
||||||
|
srcMsgLocalid,
|
||||||
|
srcMsgCreateTime,
|
||||||
|
cdnurlstring,
|
||||||
|
encrypturlstring,
|
||||||
|
externurl,
|
||||||
|
aeskey,
|
||||||
renderType,
|
renderType,
|
||||||
content
|
content
|
||||||
}
|
}
|
||||||
@@ -3998,15 +4017,23 @@ const normalizeChatHistoryRecordItem = (rec) => {
|
|||||||
if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[视频]'
|
if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[视频]'
|
||||||
} else if (out.renderType === 'emoji') {
|
} else if (out.renderType === 'emoji') {
|
||||||
out.emojiMd5 = pickFirstMd5(out.md5, out.fullmd5, out.thumbfullmd5)
|
out.emojiMd5 = pickFirstMd5(out.md5, out.fullmd5, out.thumbfullmd5)
|
||||||
|
const remoteEmojiUrl = String(out.cdnurlstring || out.externurl || out.encrypturlstring || '').trim()
|
||||||
|
const remoteAesKey = String(out.aeskey || '').trim()
|
||||||
|
out.emojiRemoteUrl = remoteEmojiUrl
|
||||||
out.emojiUrl = out.emojiMd5
|
out.emojiUrl = out.emojiMd5
|
||||||
? `${mediaBase}/api/chat/media/emoji?account=${account}&md5=${encodeURIComponent(out.emojiMd5)}&username=${username}`
|
? `${mediaBase}/api/chat/media/emoji?account=${account}&md5=${encodeURIComponent(out.emojiMd5)}&username=${username}${remoteEmojiUrl ? `&emoji_url=${encodeURIComponent(remoteEmojiUrl)}` : ''}${remoteAesKey ? `&aes_key=${encodeURIComponent(remoteAesKey)}` : ''}`
|
||||||
: ''
|
: ''
|
||||||
if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[表情]'
|
if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[表情]'
|
||||||
} else if (out.renderType === 'image') {
|
} else if (out.renderType === 'image') {
|
||||||
out.imageMd5 = pickFirstMd5(out.fullmd5, out.thumbfullmd5, out.md5)
|
out.imageMd5 = pickFirstMd5(out.fullmd5, out.thumbfullmd5, out.md5)
|
||||||
out.imageUrl = out.imageMd5
|
const srcServerId = String(out.fromnewmsgid || '').trim()
|
||||||
? `${mediaBase}/api/chat/media/image?account=${account}&md5=${encodeURIComponent(out.imageMd5)}&username=${username}`
|
const imgParts = [
|
||||||
: ''
|
`account=${account}`,
|
||||||
|
out.imageMd5 ? `md5=${encodeURIComponent(out.imageMd5)}` : '',
|
||||||
|
srcServerId ? `server_id=${encodeURIComponent(srcServerId)}` : '',
|
||||||
|
`username=${username}`
|
||||||
|
].filter(Boolean)
|
||||||
|
out.imageUrl = imgParts.length ? `${mediaBase}/api/chat/media/image?${imgParts.join('&')}` : ''
|
||||||
if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[图片]'
|
if (!out.content || /^\[.+\]$/.test(String(out.content || '').trim())) out.content = '[图片]'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import html
|
||||||
import ipaddress
|
import ipaddress
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
@@ -18,18 +19,20 @@ from pydantic import BaseModel, Field
|
|||||||
from ..logging_config import get_logger
|
from ..logging_config import get_logger
|
||||||
from ..media_helpers import (
|
from ..media_helpers import (
|
||||||
_convert_silk_to_wav,
|
_convert_silk_to_wav,
|
||||||
|
_decrypt_emoticon_aes_cbc,
|
||||||
_detect_image_extension,
|
_detect_image_extension,
|
||||||
_detect_image_media_type,
|
_detect_image_media_type,
|
||||||
_is_probably_valid_image,
|
_download_http_bytes,
|
||||||
_iter_media_source_candidates,
|
|
||||||
_order_media_candidates,
|
|
||||||
_ensure_decrypted_resource_for_md5,
|
_ensure_decrypted_resource_for_md5,
|
||||||
_fallback_search_media_by_file_id,
|
_fallback_search_media_by_file_id,
|
||||||
_fallback_search_media_by_md5,
|
_fallback_search_media_by_md5,
|
||||||
_get_decrypted_resource_path,
|
_get_decrypted_resource_path,
|
||||||
_get_resource_dir,
|
_get_resource_dir,
|
||||||
_guess_media_type_by_path,
|
_guess_media_type_by_path,
|
||||||
|
_is_probably_valid_image,
|
||||||
_iter_emoji_source_candidates,
|
_iter_emoji_source_candidates,
|
||||||
|
_iter_media_source_candidates,
|
||||||
|
_order_media_candidates,
|
||||||
_read_and_maybe_decrypt_media,
|
_read_and_maybe_decrypt_media,
|
||||||
_resolve_account_db_storage_dir,
|
_resolve_account_db_storage_dir,
|
||||||
_resolve_account_dir,
|
_resolve_account_dir,
|
||||||
@@ -40,6 +43,7 @@ from ..media_helpers import (
|
|||||||
_try_find_decrypted_resource,
|
_try_find_decrypted_resource,
|
||||||
_try_strip_media_prefix,
|
_try_strip_media_prefix,
|
||||||
)
|
)
|
||||||
|
from ..chat_helpers import _extract_md5_from_packed_info
|
||||||
from ..path_fix import PathFixRoute
|
from ..path_fix import PathFixRoute
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
@@ -300,6 +304,51 @@ def _is_valid_md5(s: str) -> bool:
|
|||||||
return bool(re.fullmatch(r"[0-9a-f]{32}", v))
|
return bool(re.fullmatch(r"[0-9a-f]{32}", v))
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=4096)
|
||||||
|
def _lookup_resource_md5_by_server_id(account_dir_str: str, server_id: int, want_local_type: int = 0) -> str:
|
||||||
|
"""Resolve on-disk resource md5 from message_resource.db by message_svr_id.
|
||||||
|
|
||||||
|
WeChat 4.x often stores media on disk using an md5 derived from `packed_info` rather than
|
||||||
|
the `fullmd5/thumbfullmd5` values found in message XML (including merged-forward records).
|
||||||
|
"""
|
||||||
|
account_dir_str = str(account_dir_str or "").strip()
|
||||||
|
if not account_dir_str:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
sid = int(server_id or 0)
|
||||||
|
except Exception:
|
||||||
|
sid = 0
|
||||||
|
if not sid:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
account_dir = Path(account_dir_str)
|
||||||
|
db_path = account_dir / "message_resource.db"
|
||||||
|
if not db_path.exists():
|
||||||
|
return ""
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT message_local_type, packed_info FROM MessageResourceInfo "
|
||||||
|
"WHERE message_svr_id = ? ORDER BY message_create_time DESC LIMIT 1",
|
||||||
|
(sid,),
|
||||||
|
).fetchone()
|
||||||
|
if not row:
|
||||||
|
return ""
|
||||||
|
if want_local_type and int(row[0] or 0) != int(want_local_type):
|
||||||
|
return ""
|
||||||
|
md5 = _extract_md5_from_packed_info(row[1])
|
||||||
|
md5 = str(md5 or "").strip().lower()
|
||||||
|
return md5 if _is_valid_md5(md5) else ""
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _is_safe_http_url(url: str) -> bool:
|
def _is_safe_http_url(url: str) -> bool:
|
||||||
u = str(url or "").strip()
|
u = str(url or "").strip()
|
||||||
if not u:
|
if not u:
|
||||||
@@ -459,12 +508,13 @@ async def download_chat_emoji(req: EmojiDownloadRequest):
|
|||||||
async def get_chat_image(
|
async def get_chat_image(
|
||||||
md5: Optional[str] = None,
|
md5: Optional[str] = None,
|
||||||
file_id: Optional[str] = None,
|
file_id: Optional[str] = None,
|
||||||
|
server_id: Optional[int] = None,
|
||||||
account: Optional[str] = None,
|
account: Optional[str] = None,
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
deep_scan: bool = False,
|
deep_scan: bool = False,
|
||||||
):
|
):
|
||||||
if (not md5) and (not file_id):
|
if (not md5) and (not file_id) and (not server_id):
|
||||||
raise HTTPException(status_code=400, detail="Missing md5/file_id.")
|
raise HTTPException(status_code=400, detail="Missing md5/file_id/server_id.")
|
||||||
|
|
||||||
# Some WeChat versions put non-MD5 identifiers in the "md5" field; treat them as file_id.
|
# Some WeChat versions put non-MD5 identifiers in the "md5" field; treat them as file_id.
|
||||||
if md5 and (not file_id) and (not _is_valid_md5(str(md5))):
|
if md5 and (not file_id) and (not _is_valid_md5(str(md5))):
|
||||||
@@ -472,6 +522,13 @@ async def get_chat_image(
|
|||||||
md5 = None
|
md5 = None
|
||||||
account_dir = _resolve_account_dir(account)
|
account_dir = _resolve_account_dir(account)
|
||||||
|
|
||||||
|
# Prefer resource md5 derived from message_resource.db for chat history / app messages.
|
||||||
|
# This matches how regular image messages are resolved elsewhere in the codebase.
|
||||||
|
if server_id:
|
||||||
|
resource_md5 = _lookup_resource_md5_by_server_id(str(account_dir), int(server_id), want_local_type=3)
|
||||||
|
if resource_md5:
|
||||||
|
md5 = resource_md5
|
||||||
|
|
||||||
# md5 模式:优先从解密资源目录读取(更快)
|
# md5 模式:优先从解密资源目录读取(更快)
|
||||||
if md5:
|
if md5:
|
||||||
decrypted_path = _try_find_decrypted_resource(account_dir, str(md5).lower())
|
decrypted_path = _try_find_decrypted_resource(account_dir, str(md5).lower())
|
||||||
@@ -620,7 +677,13 @@ async def get_chat_image(
|
|||||||
|
|
||||||
|
|
||||||
@router.get("/api/chat/media/emoji", summary="获取表情消息资源")
|
@router.get("/api/chat/media/emoji", summary="获取表情消息资源")
|
||||||
async def get_chat_emoji(md5: str, account: Optional[str] = None, username: Optional[str] = None):
|
async def get_chat_emoji(
|
||||||
|
md5: str,
|
||||||
|
account: Optional[str] = None,
|
||||||
|
username: Optional[str] = None,
|
||||||
|
emoji_url: Optional[str] = None,
|
||||||
|
aes_key: Optional[str] = None,
|
||||||
|
):
|
||||||
if not md5:
|
if not md5:
|
||||||
raise HTTPException(status_code=400, detail="Missing md5.")
|
raise HTTPException(status_code=400, detail="Missing md5.")
|
||||||
account_dir = _resolve_account_dir(account)
|
account_dir = _resolve_account_dir(account)
|
||||||
@@ -652,6 +715,44 @@ async def get_chat_emoji(md5: str, account: Optional[str] = None, username: Opti
|
|||||||
if data2 is not None and mt2:
|
if data2 is not None and mt2:
|
||||||
data, media_type = data2, mt2
|
data, media_type = data2, mt2
|
||||||
|
|
||||||
|
if media_type == "application/octet-stream" and emoji_url:
|
||||||
|
# Some merged-forward records include CDN URLs and AES keys inside recordItem, but the md5
|
||||||
|
# is missing from emoticon.db; allow the client to provide a safe remote URL as fallback.
|
||||||
|
url = html.unescape(str(emoji_url or "")).strip()
|
||||||
|
if url:
|
||||||
|
try:
|
||||||
|
payload = _download_http_bytes(url)
|
||||||
|
except Exception:
|
||||||
|
payload = b""
|
||||||
|
|
||||||
|
candidates: list[bytes] = [payload] if payload else []
|
||||||
|
dec = _decrypt_emoticon_aes_cbc(payload, str(aes_key or "").strip()) if payload and aes_key else None
|
||||||
|
if dec is not None:
|
||||||
|
candidates.insert(0, dec)
|
||||||
|
|
||||||
|
for blob in candidates:
|
||||||
|
if not blob:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
data2, mt = _try_strip_media_prefix(blob)
|
||||||
|
except Exception:
|
||||||
|
data2, mt = blob, "application/octet-stream"
|
||||||
|
|
||||||
|
if mt == "application/octet-stream":
|
||||||
|
mt = _detect_image_media_type(data2[:32])
|
||||||
|
if mt == "application/octet-stream":
|
||||||
|
try:
|
||||||
|
if len(data2) >= 8 and data2[4:8] == b"ftyp":
|
||||||
|
mt = "video/mp4"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if mt.startswith("image/") and (not _is_probably_valid_image(data2, mt)):
|
||||||
|
continue
|
||||||
|
if mt != "application/octet-stream":
|
||||||
|
data, media_type = data2, mt
|
||||||
|
break
|
||||||
|
|
||||||
if (not p) and media_type == "application/octet-stream":
|
if (not p) and media_type == "application/octet-stream":
|
||||||
raise HTTPException(status_code=404, detail="Emoji not found.")
|
raise HTTPException(status_code=404, detail="Emoji not found.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user