From 2a1ae2150f257dacd5b34fc11d24824f805934f6 Mon Sep 17 00:00:00 2001 From: 2977094657 <2977094657@qq.com> Date: Fri, 13 Feb 2026 22:38:28 +0800 Subject: [PATCH 1/3] =?UTF-8?q?feat(chat-export):=20=E6=94=AF=E6=8C=81=20H?= =?UTF-8?q?TML=20=E5=AF=BC=E5=87=BA=EF=BC=88=E5=90=88=E5=B9=B6=E6=B6=88?= =?UTF-8?q?=E6=81=AF/=E8=BF=9C=E7=A8=8B=E7=BC=A9=E7=95=A5=E5=9B=BE?= =?UTF-8?q?=E5=8F=AF=E9=80=89=E4=B8=8B=E8=BD=BD=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 导出格式新增 html:生成 index.html + 会话 messages.html,离线浏览 - 支持 chatHistory(合并消息)解析/渲染与弹窗查看 - 图片资源解析增强:MessageResourceInfo 优先 + md5/hdmd5 兜底 - HTML 导出可选下载远程缩略图(仅公网主机/图片类型/5MB 限制) - 修复拍一拍误判、公众号封面样式识别;转账过期状态与前端展示 --- frontend/composables/useApi.js | 1 + frontend/pages/chat/[[username]].vue | 71 +- .../public/assets/images/wechat/overdue.png | Bin 0 -> 1580 bytes .../chat_export_service.py | 3063 ++++++++++++++++- src/wechat_decrypt_tool/chat_helpers.py | 51 +- src/wechat_decrypt_tool/routers/chat.py | 86 + .../routers/chat_export.py | 25 +- ...hat_app_message_type4_patmsg_regression.py | 50 + tests/test_chat_export_chat_history_modal.py | 218 ++ tests/test_chat_export_html_format.py | 353 ++ ...hat_export_image_md5_candidate_fallback.py | 199 ++ ...port_image_md5_prefers_message_resource.py | 235 ++ ...est_chat_export_message_types_semantics.py | 1 + tests/test_chat_export_remote_thumb_option.py | 304 ++ .../test_chat_official_article_cover_style.py | 58 + tests/test_transfer_postprocess.py | 63 +- 16 files changed, 4696 insertions(+), 82 deletions(-) create mode 100644 frontend/public/assets/images/wechat/overdue.png create mode 100644 tests/test_chat_app_message_type4_patmsg_regression.py create mode 100644 tests/test_chat_export_chat_history_modal.py create mode 100644 tests/test_chat_export_html_format.py create mode 100644 tests/test_chat_export_image_md5_candidate_fallback.py create mode 100644 tests/test_chat_export_image_md5_prefers_message_resource.py create mode 100644 tests/test_chat_export_remote_thumb_option.py create mode 100644 tests/test_chat_official_article_cover_style.py diff --git a/frontend/composables/useApi.js b/frontend/composables/useApi.js index c1ce542..f30f7d3 100644 --- a/frontend/composables/useApi.js +++ b/frontend/composables/useApi.js @@ -294,6 +294,7 @@ export const useApi = () => { media_kinds: Array.isArray(data.media_kinds) ? data.media_kinds : ['image', 'emoji', 'video', 'video_thumb', 'voice', 'file'], output_dir: data.output_dir == null ? null : String(data.output_dir || '').trim(), allow_process_key_extract: !!data.allow_process_key_extract, + download_remote_media: !!data.download_remote_media, privacy_mode: !!data.privacy_mode, file_name: data.file_name || null } diff --git a/frontend/pages/chat/[[username]].vue b/frontend/pages/chat/[[username]].vue index 7366a49..6d600f5 100644 --- a/frontend/pages/chat/[[username]].vue +++ b/frontend/pages/chat/[[username]].vue @@ -722,7 +722,7 @@ @click.stop="openChatHistoryModal(message)" >
-
{{ message.title || '聊天记录' }}
+
{{ message.title || '合并消息' }}
- 聊天记录 + 合并消息
+ :class="[{ 'wechat-transfer-received': message.transferReceived, 'wechat-transfer-returned': isTransferReturned(message), 'wechat-transfer-overdue': isTransferOverdue(message) }, message.isSent ? 'wechat-transfer-sent-side' : 'wechat-transfer-received-side']">
+
@@ -1233,7 +1234,7 @@ @click.stop >
-
{{ chatHistoryModalTitle || '聊天记录' }}
+
{{ chatHistoryModalTitle || '合并消息' }}
@@ -1516,6 +1521,19 @@
+
+
HTML 选项
+
+ +
+ 仅 HTML 生效;会在导出时尝试下载远程缩略图并写入 ZIP(已做安全限制)。隐私模式下自动忽略。 +
+
+
+
\n") + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + tw.write('
\n') + + conv_dir_norm = str(conv_dir or "").strip().strip("/").replace("\\", "/") + for item in session_items: + item_conv_dir = str(item.get("convDir") or "").strip().strip("/").replace("\\", "/") + if not item_conv_dir: + continue + + href = f"{rel_root}{item_conv_dir}/messages.html" + item_display_name = str(item.get("displayName") or "").strip() or "会话" + item_avatar_path = str(item.get("avatarPath") or "").strip() + item_avatar_src = rel_path(item_avatar_path) if item_avatar_path else "" + item_last_time = str(item.get("lastTimeText") or "").strip() + item_preview = str(item.get("previewText") or "").strip() + + is_active = False + try: + is_active = (str(item.get("username") or "").strip() == conv_username) or (item_conv_dir == conv_dir_norm) + except Exception: + is_active = item_conv_dir == conv_dir_norm + + safe_char = (item_display_name[:1] or "?").strip() or "?" + classes = ( + "px-3 cursor-pointer transition-colors duration-150 border-b border-gray-100 " + "h-[calc(80px/var(--dpr))] flex items-center" + ) + if is_active: + classes += " bg-[#DEDEDE]" + else: + classes += " hover:bg-[#F5F5F5]" + + item_username = str(item.get("username") or "").strip() + tw.write( + f' \n") + tw.write('
\n') + tw.write( + '
\n' + ) + if item_avatar_src and (not privacy_mode): + tw.write( + f' {esc_attr(item_display_name)}\n' + ) + else: + tw.write( + f'
{esc_text(safe_char)}
\n' + ) + tw.write("
\n") + tw.write("
\n") + tw.write('
\n') + tw.write('
\n') + tw.write( + f'

{esc_text(item_display_name)}

\n' + ) + tw.write('
\n') + tw.write(f' {esc_text(item_last_time)}\n') + tw.write("
\n") + tw.write("
\n") + tw.write( + f'

{render_text_with_emojis(item_preview)}

\n' + ) + tw.write("
\n") + tw.write("
\n") + + tw.write("
\n") + tw.write("\n") + + # Right chat area + tw.write('
\n') + tw.write('
\n') + tw.write('
\n') + tw.write('
\n') + + tw.write('
\n') + tw.write('
\n') + tw.write(f'

{esc_text(chat_title)}

\n') + tw.write("
\n") + tw.write('
\n') + tw.write(f' \n") + tw.write("
\n") + tw.write("
\n") + + tw.write('
\n') + + sender_alias_map: dict[str, int] = {} + prev_ts = 0 + scanned = 0 + for row in _iter_rows_for_conversation( + account_dir=account_dir, + conv_username=conv_username, + start_time=start_time, + end_time=end_time, + local_types=local_types, + ): + scanned += 1 + + msg = _parse_message_for_export( + row=row, + conv_username=conv_username, + is_group=conv_is_group, + resource_conn=resource_conn, + resource_chat_id=resource_chat_id, + sender_alias="", + ) + if not _is_render_type_selected(msg.get("renderType"), want_types): + continue + + sender_username = str(msg.get("senderUsername") or "").strip() + if privacy_mode: + _privacy_scrub_message(msg, conv_is_group=conv_is_group, sender_alias_map=sender_alias_map) + else: + msg["senderDisplayName"] = resolve_display_name(sender_username) if sender_username else "" + msg["senderAvatarPath"] = ( + _materialize_avatar( + zf=zf, + head_image_conn=head_image_conn, + username=sender_username, + avatar_written=avatar_written, + ) + if (sender_username and head_image_conn is not None) + else "" + ) + + if include_media: + _attach_offline_media( + zf=zf, + account_dir=account_dir, + conv_username=conv_username, + msg=msg, + media_written=media_written, + report=report, + media_kinds=media_kinds, + allow_process_key_extract=allow_process_key_extract, + media_db_path=media_db_path, + lock=lock, + job=job, + ) + _remember_offline_media(msg) + + rt = str(msg.get("renderType") or "text").strip() or "text" + create_time_text = str(msg.get("createTimeText") or "").strip() + try: + ts = int(msg.get("createTime") or 0) + except Exception: + ts = 0 + + show_divider = False + if ts and ((prev_ts == 0) or (abs(ts - prev_ts) >= 300)): + show_divider = True + + if show_divider: + divider_text = _format_session_time(ts) + if divider_text: + tw.write('
\n') + tw.write(f'
{esc_text(divider_text)}
\n') + tw.write("
\n") + + # Wrapper (for filter) + tw.write(f'
\n') + + if rt == "system": + tw.write('
\n') + tw.write(f'
{esc_text(msg.get("content") or "")}
\n') + tw.write("
\n") + tw.write("
\n") + exported += 1 + with lock: + job.progress.messages_exported += 1 + job.progress.current_conversation_messages_exported = exported + if ts: + prev_ts = ts + continue + + is_sent = bool(msg.get("isSent")) + row_cls = "wce-msg-row wce-msg-row-sent flex items-center justify-end" if is_sent else "wce-msg-row wce-msg-row-received flex items-center justify-start" + msg_cls = "wce-msg wce-msg-sent flex items-start max-w-md flex-row-reverse" if is_sent else "wce-msg flex items-start max-w-md" + avatar_extra = "wce-avatar-sent ml-3" if is_sent else "wce-avatar-received mr-3" + + tw.write(f'
\n') + tw.write(f'
\n') + + avatar_src = rel_path(str(msg.get("senderAvatarPath") or "").strip()) + display_name = str(msg.get("senderDisplayName") or "").strip() + fallback_char = (display_name or sender_username or "?")[:1] + tw.write(" " + build_avatar_html(src=avatar_src, fallback_text=fallback_char, extra_class=avatar_extra) + "\n") + + align_cls = "items-end" if is_sent else "items-start" + tw.write(f'
\n') + if conv_is_group and (not is_sent) and display_name: + tw.write(f'
{esc_text(display_name)}
\n') + + pos_cls = "right-0" if is_sent else "left-0" + tw.write( + '
{esc_text(create_time_text)}
\n' + ) + + # Message body + bubble_dir_cls = "bg-[#95EC69] text-black bubble-tail-r" if is_sent else "bg-white text-gray-800 bubble-tail-l" + bubble_base_cls = "px-3 py-2 text-sm max-w-sm relative msg-bubble whitespace-pre-wrap break-words leading-relaxed" + bubble_unknown_cls = ( + "px-3 py-2 text-xs max-w-sm relative msg-bubble whitespace-pre-wrap break-words leading-relaxed text-gray-700" + ) + + if rt == "image": + src = offline_path(msg, "image") + if not src: + url = str(msg.get("imageUrl") or "").strip() + src = url if is_http_url(url) else "" + if src: + tw.write('
\n') + tw.write('
\n') + tw.write(f' \n') + tw.write(f' 图片\n') + tw.write(" \n") + tw.write("
\n") + tw.write("
\n") + else: + tw.write(f'
{render_text_with_emojis(msg.get("content") or "")}
\n') + elif rt == "emoji": + src = offline_path(msg, "emoji") + if not src: + url = str(msg.get("emojiUrl") or "").strip() + src = url if is_http_url(url) else "" + if src: + emoji_dir = " flex-row-reverse" if is_sent else "" + tw.write(f'
\n') + tw.write(f' 表情\n') + tw.write("
\n") + else: + tw.write(f'
{render_text_with_emojis(msg.get("content") or "")}
\n') + elif rt == "video": + thumb = offline_path(msg, "video_thumb") + if not thumb: + url = str(msg.get("videoThumbUrl") or "").strip() + thumb = url if is_http_url(url) else "" + video = offline_path(msg, "video") + if not video: + url = str(msg.get("videoUrl") or "").strip() + video = url if is_http_url(url) else "" + if thumb: + tw.write('
\n') + tw.write('
\n') + tw.write(f' 视频\n') + if video: + tw.write(f' \n') + tw.write('
\n') + tw.write(' \n') + tw.write("
\n") + tw.write("
\n") + else: + tw.write('
\n') + tw.write('
\n') + tw.write(' \n') + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + else: + tw.write(f'
{render_text_with_emojis(msg.get("content") or "")}
\n') + elif rt == "voice": + voice = offline_path(msg, "voice") + if voice: + duration_ms = msg.get("voiceLength") + width = get_voice_width(duration_ms) + seconds = get_voice_duration_in_seconds(duration_ms) + voice_dir_cls = "wechat-voice-sent" if is_sent else "wechat-voice-received" + content_dir_cls = " flex-row-reverse" if is_sent else "" + icon_dir_cls = "voice-icon-sent" if is_sent else "voice-icon-received" + voice_id = str(msg.get("id") or "").strip() + + tw.write('
\n') + tw.write( + f'
\n' + ) + tw.write(f'
\n') + tw.write( + f' \n' + ) + tw.write( + ' \n' + ) + tw.write( + ' \n' + ) + tw.write( + ' \n' + ) + tw.write(" \n") + tw.write(f' {esc_text(seconds)}"\n') + tw.write("
\n") + tw.write("
\n") + tw.write(f' \n') + tw.write("
\n") + else: + tw.write(f'
{render_text_with_emojis(msg.get("content") or "")}
\n') + elif rt == "file": + fsrc = offline_path(msg, "file") + title = str(msg.get("title") or msg.get("content") or "文件").strip() + size = str(msg.get("fileSize") or "").strip() + size_text = format_file_size(size) + sent_side_cls = " wechat-special-sent-side" if is_sent else "" + cls = f"wechat-redpacket-card wechat-special-card wechat-file-card msg-radius{sent_side_cls}" + tag = "a" if fsrc else "div" + attrs = f' href="{esc_attr(fsrc)}" download' if fsrc else "" + tw.write(f' <{tag}{attrs} class="{esc_attr(cls)}">\n') + tw.write('
\n') + tw.write('
\n') + tw.write(f' {esc_text(title or "文件")}\n') + if size_text: + tw.write(f' {esc_text(size_text)}\n') + tw.write("
\n") + tw.write(f' \n') + tw.write("
\n") + tw.write('
\n') + tw.write(f' \n') + tw.write(" 微信电脑版\n") + tw.write("
\n") + tw.write(f" \n") + elif rt == "link": + url = str(msg.get("url") or "").strip() + safe_url = url if is_http_url(url) else "" + if safe_url: + heading = str(msg.get("title") or msg.get("content") or safe_url).strip() + abstract = str(msg.get("content") or "").strip() + preview = str(msg.get("thumbUrl") or "").strip() + preview_url = "" + if is_http_url(preview): + local = maybe_download_remote_image(preview) + preview_url = local or preview + variant = str(msg.get("linkStyle") or "").strip().lower() + + from_text = get_link_from_text(msg, url=safe_url) + from_avatar_text = first_glyph(from_text) or "\u200B" + from_text = from_text or "\u200B" + sent_side_cls = " wechat-special-sent-side" if is_sent else "" + + if variant == "cover": + cls = f"wechat-link-card-cover wechat-special-card msg-radius{sent_side_cls}" + tw.write( + f' \n' + ) + if preview_url: + tw.write(' \n") + else: + tw.write(' \n") + tw.write(f' \n') + tw.write(" \n") + else: + cls = f"wechat-link-card wechat-special-card msg-radius{sent_side_cls}" + tw.write( + f' \n' + ) + tw.write(' \n") + tw.write(' \n") + tw.write(" \n") + else: + tw.write(f'
{render_text_with_emojis(msg.get("content") or "")}
\n') + elif rt == "voip": + voip_dir_cls = "wechat-voip-sent" if is_sent else "wechat-voip-received" + content_dir_cls = " flex-row-reverse" if is_sent else "" + voip_type = str(msg.get("voipType") or "").strip().lower() + icon = "wechat-video-light.png" if voip_type == "video" else "wechat-audio-light.png" + tw.write(f'
\n') + tw.write(f'
\n') + tw.write(f' \n') + tw.write(f' {esc_text(msg.get("content") or "通话")}\n') + tw.write("
\n") + tw.write("
\n") + elif rt == "quote": + tw.write( + f'
{render_text_with_emojis(msg.get("content") or "")}
\n' + ) + + qt = str(msg.get("quoteTitle") or "").strip() + qc = str(msg.get("quoteContent") or "").strip() + qthumb = str(msg.get("quoteThumbUrl") or "").strip() + qtype = str(msg.get("quoteType") or "").strip() + qsid_raw = str(msg.get("quoteServerId") or "").strip() + qsid = int(qsid_raw) if qsid_raw.isdigit() else 0 + + def is_quoted_voice() -> bool: + if qtype == "34": + return True + return (qc == "[语音]") and bool(qsid_raw) + + def is_quoted_image() -> bool: + if qtype == "3": + return True + return (qc == "[图片]") and bool(qsid_raw) + + def is_quoted_link() -> bool: + if qtype == "49": + return True + return bool(re.match(r"^\[链接\]\s*", qc)) + + def get_quoted_link_text() -> str: + if not qc: + return "" + return re.sub(r"^\[链接\]\s*", "", qc).strip() or qc + + quoted_voice = is_quoted_voice() + quoted_image = is_quoted_image() + quoted_link = is_quoted_link() + + quote_voice_url = "" + if include_media and ("voice" in media_kinds) and quoted_voice and qsid: + try: + arc, is_new = _materialize_voice( + zf=zf, + media_db_path=media_db_path, + server_id=int(qsid), + media_written=media_written, + ) + except Exception: + arc, is_new = "", False + if arc: + quote_voice_url = rel_path(arc) + if is_new: + with lock: + job.progress.media_copied += 1 + + quote_image_url = "" + if include_media and ("image" in media_kinds) and quoted_image and qsid and resource_conn is not None: + md5_hit = "" + try: + md5_hit = _lookup_resource_md5( + resource_conn, + resource_chat_id, + message_local_type=3, + server_id=int(qsid), + local_id=0, + create_time=0, + ) + except Exception: + md5_hit = "" + + if md5_hit: + try: + arc, is_new = _materialize_media( + zf=zf, + account_dir=account_dir, + conv_username=conv_username, + kind="image", + md5=str(md5_hit or "").strip().lower(), + file_id="", + media_written=media_written, + suggested_name="", + ) + except Exception: + arc, is_new = "", False + if arc: + quote_image_url = rel_path(arc) + if is_new: + with lock: + job.progress.media_copied += 1 + + qthumb_url = "" + if is_http_url(qthumb): + qthumb_local = maybe_download_remote_image(qthumb) if download_remote_media else "" + qthumb_url = qthumb_local or qthumb + + if qt or qc: + tw.write( + '
\n' + ) + tw.write('
\n') + if quoted_voice: + seconds = get_voice_duration_in_seconds(msg.get("quoteVoiceLength")) + disabled = not bool(quote_voice_url) + btn_cls = "flex items-center gap-1 min-w-0 hover:opacity-80" + if disabled: + btn_cls += " opacity-60 cursor-not-allowed" + dis_attr = " disabled" if disabled else "" + tw.write('
\n') + if qt: + tw.write(f' {esc_text(qt)}:\n') + tw.write( + f' \n") + if quote_voice_url: + tw.write( + f' \n' + ) + tw.write("
\n") + else: + tw.write('
\n') + if quoted_link: + link_text = get_quoted_link_text() + tw.write('
\n') + if qt: + tw.write(f' {esc_text(qt)}:\n') + if link_text: + ml = ' class="ml-1"' if qt else "" + tw.write(f' 🔗 {esc_text(link_text)}\n') + tw.write("
\n") + else: + hide_qc = quoted_image and qt and bool(quote_image_url) + tw.write('
\n') + if qt: + tw.write(f' {esc_text(qt)}:\n') + if qc and (not hide_qc): + ml = ' class="ml-1"' if qt else "" + tw.write(f' {esc_text(qc)}\n') + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + + if quoted_link and qthumb_url: + tw.write( + f' \n' + ) + tw.write( + f' 引用链接缩略图\n' + ) + tw.write(" \n") + + if (not quoted_link) and quoted_image and quote_image_url: + tw.write( + f' \n' + ) + tw.write( + f' 引用图片\n' + ) + tw.write(" \n") + + tw.write("
\n") + elif rt == "chatHistory": + title = str(msg.get("title") or "").strip() or "合并消息" + record_item = str(msg.get("recordItem") or "").strip() + record_item_b64 = "" + if record_item: + try: + record_item_b64 = base64.b64encode(record_item.encode("utf-8", errors="replace")).decode("ascii") + except Exception: + record_item_b64 = "" + + if record_item and include_media and (not privacy_mode): + try: + for m in _CHAT_HISTORY_MD5_TAG_RE.findall(record_item): + _ensure_chat_history_md5(m) + except Exception: + pass + if resource_conn is not None: + try: + server_map = page_media_index.get("serverMd5") + if not isinstance(server_map, dict): + server_map = {} + page_media_index["serverMd5"] = server_map + + for sid_raw in _CHAT_HISTORY_SERVER_ID_TAG_RE.findall(record_item): + sid_text = str(sid_raw or "").strip() + if not sid_text or sid_text in server_map: + continue + if (len(sid_text) > 24) or (not sid_text.isdigit()): + continue + sid = int(sid_text) + if sid <= 0: + continue + + md5_hit = "" + try: + md5_hit = _lookup_resource_md5( + resource_conn, + None, # do NOT filter by chat_id: merged-forward records come from other chats + 0, # do NOT filter by local_type + int(sid), + 0, + 0, + ) + except Exception: + md5_hit = "" + + md5_hit = str(md5_hit or "").strip().lower() + if not _is_md5(md5_hit): + continue + if _ensure_chat_history_md5(md5_hit): + server_map[sid_text] = md5_hit + except Exception: + pass + if download_remote_media: + try: + for u in _CHAT_HISTORY_URL_TAG_RE.findall(record_item): + maybe_download_remote_image(u) + except Exception: + pass + + lines = get_chat_history_preview_lines(msg) + sent_side_cls = " wechat-special-sent-side" if is_sent else "" + cls = f"wechat-chat-history-card wechat-special-card msg-radius{sent_side_cls} cursor-pointer" + tw.write( + f'
\n' + ) + tw.write('
\n') + tw.write(f'
{esc_text(title)}
\n') + if lines: + tw.write('
\n') + for line in lines: + tw.write(f'
{esc_text(line)}
\n') + tw.write("
\n") + tw.write("
\n") + tw.write('
合并消息
\n') + tw.write("
\n") + elif rt == "transfer": + received = is_transfer_received(msg) + returned = is_transfer_returned(msg) + overdue = is_transfer_overdue(msg) + side_cls = "wechat-transfer-sent-side" if is_sent else "wechat-transfer-received-side" + cls_parts = ["wechat-transfer-card", "msg-radius", side_cls] + if received: + cls_parts.append("wechat-transfer-received") + if returned: + cls_parts.append("wechat-transfer-returned") + if overdue: + cls_parts.append("wechat-transfer-overdue") + cls = " ".join(cls_parts) + if returned: + icon = "wechat-returned.png" + elif overdue: + icon = "overdue.png" + elif received: + icon = "wechat-trans-icon2.png" + else: + icon = "wechat-trans-icon1.png" + amount = format_transfer_amount(msg.get("amount")) + status = get_transfer_title(msg, is_sent=is_sent) + tw.write(f'
\n') + tw.write('
\n') + tw.write(f' \n') + tw.write('
\n') + if amount: + tw.write(f' ¥{esc_text(amount)}\n') + tw.write(f' {esc_text(status)}\n') + tw.write("
\n") + tw.write("
\n") + tw.write('
微信转账
\n') + tw.write("
\n") + elif rt == "redPacket": + received = False + cls_parts = ["wechat-redpacket-card", "wechat-special-card", "msg-radius"] + if received: + cls_parts.append("wechat-redpacket-received") + if is_sent: + cls_parts.append("wechat-special-sent-side") + icon = "wechat-trans-icon4.png" if received else "wechat-trans-icon3.png" + tw.write(f'
\n') + tw.write('
\n') + tw.write(f' \n') + tw.write('
\n') + tw.write(f' {esc_text(get_red_packet_text(msg))}\n') + if received: + tw.write(' 已领取\n') + tw.write("
\n") + tw.write("
\n") + tw.write('
微信红包
\n') + tw.write("
\n") + elif rt == "text": + tw.write(f'
{render_text_with_emojis(msg.get("content") or "")}
\n') + else: + content = str(msg.get("content") or "").strip() + if not content: + content = f"[{str(msg.get('type') or 'unknown')}] 消息" + tw.write(f'
{render_text_with_emojis(content)}
\n') + + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + + exported += 1 + with lock: + job.progress.messages_exported += 1 + job.progress.current_conversation_messages_exported = exported + if ts: + prev_ts = ts + + if scanned % 500 == 0 and job.cancel_requested: + raise _JobCancelled() + + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + tw.write("
\n") + tw.write("\n") + tw.write("\n") + + try: + media_index_payload = json.dumps(page_media_index, ensure_ascii=False) + except Exception: + media_index_payload = "{}" + media_index_payload = media_index_payload.replace("{media_index_payload}\n') + + tw.write( + '\n") + + tw.write("\n") + tw.write("\n") + tw.flush() + + zf.write(str(tmp_path), arcname) + + return exported + + def _format_message_line_txt(*, msg: dict[str, Any]) -> str: ts = int(msg.get("createTime") or 0) time_text = _format_ts(ts) @@ -1685,9 +4480,16 @@ def _privacy_scrub_message( for k in ( "title", "url", + "from", + "fromUsername", + "linkType", + "linkStyle", "thumbUrl", + "recordItem", "imageMd5", "imageFileId", + "imageMd5Candidates", + "imageFileIdCandidates", "imageUrl", "emojiMd5", "emojiUrl", @@ -1698,6 +4500,11 @@ def _privacy_scrub_message( "videoUrl", "videoThumbUrl", "voiceLength", + "quoteUsername", + "quoteServerId", + "quoteType", + "quoteThumbUrl", + "quoteVoiceLength", "quoteTitle", "quoteContent", "amount", @@ -1752,25 +4559,88 @@ def _attach_offline_media( offline: list[dict[str, Any]] = [] if rt == "image" and "image" in media_kinds: - md5 = str(msg.get("imageMd5") or "").strip().lower() - file_id = str(msg.get("imageFileId") or "").strip() - arc, is_new = _materialize_media( - zf=zf, - account_dir=account_dir, - conv_username=conv_username, - kind="image", - md5=md5 if _is_md5(md5) else "", - file_id=file_id, - media_written=media_written, - suggested_name="", - ) + primary_md5 = str(msg.get("imageMd5") or "").strip().lower() + primary_file_id = str(msg.get("imageFileId") or "").strip() + + md5_candidates_raw = msg.get("imageMd5Candidates") or [] + file_id_candidates_raw = msg.get("imageFileIdCandidates") or [] + md5_candidates = md5_candidates_raw if isinstance(md5_candidates_raw, list) else [] + file_id_candidates = file_id_candidates_raw if isinstance(file_id_candidates_raw, list) else [] + + md5s: list[str] = [] + file_ids: list[str] = [] + + def add_md5(v: Any) -> None: + s = str(v or "").strip().lower() + if _is_md5(s) and s not in md5s: + md5s.append(s) + + def add_file_id(v: Any) -> None: + s = str(v or "").strip() + if s and s not in file_ids: + file_ids.append(s) + + add_md5(primary_md5) + for v in md5_candidates: + add_md5(v) + + add_file_id(primary_file_id) + for v in file_id_candidates: + add_file_id(v) + + arc = "" + is_new = False + used_md5 = "" + used_file_id = "" + + # Prefer md5-based resolution first (more reliable), then fall back to file_id search. + for md5 in md5s: + arc, is_new = _materialize_media( + zf=zf, + account_dir=account_dir, + conv_username=conv_username, + kind="image", + md5=md5, + file_id="", + media_written=media_written, + suggested_name="", + ) + if arc: + used_md5 = md5 + break + + if not arc: + for file_id in file_ids: + arc, is_new = _materialize_media( + zf=zf, + account_dir=account_dir, + conv_username=conv_username, + kind="image", + md5="", + file_id=file_id, + media_written=media_written, + suggested_name="", + ) + if arc: + used_file_id = file_id + break + if arc: - offline.append({"kind": "image", "path": arc, "md5": md5, "fileId": file_id}) + # Keep primary fields in sync with what actually resolved. + try: + if used_md5: + msg["imageMd5"] = used_md5 + if used_file_id: + msg["imageFileId"] = used_file_id + except Exception: + pass + + offline.append({"kind": "image", "path": arc, "md5": used_md5 or primary_md5, "fileId": used_file_id or primary_file_id}) if is_new: with lock: job.progress.media_copied += 1 else: - record_missing("image", md5 or file_id) + record_missing("image", primary_md5 or primary_file_id) if rt == "emoji" and "emoji" in media_kinds: md5 = str(msg.get("emojiMd5") or "").strip().lower() @@ -2045,20 +4915,27 @@ def _materialize_media( except Exception: return "", False + try: + with open(src, "rb") as f: + head = f.read(64) + except Exception: + head = b"" + + head_mt = _detect_image_media_type(head[:32]) + looks_like_mp4 = len(head) >= 8 and head[4:8] == b"ftyp" + ext = src.suffix.lstrip(".").lower() if not ext: - try: - head = src.read_bytes()[:32] - except Exception: - head = b"" - mt = _detect_image_media_type(head) - if mt.startswith("image/"): - ext = mt.split("/", 1)[-1] - elif len(head) >= 8 and head[4:8] == b"ftyp": + if head_mt.startswith("image/"): + ext = head_mt.split("/", 1)[-1] + elif looks_like_mp4: ext = "mp4" else: ext = "dat" + if ext == "jpeg": + ext = "jpg" + folder = "misc" if kind == "image": folder = "images" @@ -2080,10 +4957,62 @@ def _materialize_media( arc_name = arc_name[:160] arc = f"media/{folder}/{arc_name}" - try: - zf.write(src, arcname=arc) - except Exception: - return "", False + should_stream_copy = False + if kind == "file": + should_stream_copy = True + elif kind in {"image", "emoji", "video_thumb"}: + should_stream_copy = ( + (ext == "jpg" and head_mt == "image/jpeg") + or (ext == "png" and head_mt == "image/png") + or (ext == "gif" and head_mt == "image/gif") + or (ext == "webp" and head_mt == "image/webp") + ) + elif kind == "video": + should_stream_copy = ext == "mp4" and looks_like_mp4 + + if should_stream_copy or (kind not in {"image", "emoji", "video", "video_thumb"}): + try: + zf.write(src, arcname=arc) + except Exception: + return "", False + else: + try: + data, mt = _read_and_maybe_decrypt_media(src, account_dir=account_dir) + except Exception: + try: + zf.write(src, arcname=arc) + except Exception: + return "", False + media_written[key] = arc + return arc, True + + mt = str(mt or "").strip() + if mt == "image/png": + ext2 = "png" + elif mt == "image/jpeg": + ext2 = "jpg" + elif mt == "image/gif": + ext2 = "gif" + elif mt == "image/webp": + ext2 = "webp" + elif mt == "video/mp4": + ext2 = "mp4" + else: + ext2 = "dat" if mt == "application/octet-stream" else (ext or "dat") + + if ext2 != ext: + if nice and kind == "file": + arc_name = f"{nice}_{ident}.{ext2}" if ext2 else f"{nice}_{ident}" + else: + arc_name = f"{ident}.{ext2}" if ext2 else ident + if len(arc_name) > 160: + arc_name = arc_name[:160] + arc = f"media/{folder}/{arc_name}" + + try: + zf.writestr(arc, data) + except Exception: + return "", False media_written[key] = arc return arc, True diff --git a/src/wechat_decrypt_tool/chat_helpers.py b/src/wechat_decrypt_tool/chat_helpers.py index 8efdfc3..8c4301f 100644 --- a/src/wechat_decrypt_tool/chat_helpers.py +++ b/src/wechat_decrypt_tool/chat_helpers.py @@ -8,7 +8,7 @@ from collections import Counter from datetime import datetime from pathlib import Path from typing import Any, Optional -from urllib.parse import quote, urlparse +from urllib.parse import parse_qs, quote, urlparse from fastapi import HTTPException @@ -634,6 +634,32 @@ def _is_mp_weixin_article_url(url: str) -> bool: return "mp.weixin.qq.com/" in lu +def _is_mp_weixin_feed_article_url(url: str) -> bool: + """Detect WeChat's PC feed/recommendation mp.weixin.qq.com share URLs. + + These links often carry an `exptype` like: + masonry_feed_brief_content_elite_for_pcfeeds_u2i + + WeChat desktop tends to render them in a cover-card style (image + bottom title), + so we use this as a hint to choose the 'cover' linkStyle. + """ + + u = str(url or "").strip() + if not u: + return False + + try: + parsed = urlparse(u) + q = parse_qs(parsed.query or "") + for v in (q.get("exptype") or []): + if "masonry_feed" in str(v or "").lower(): + return True + except Exception: + pass + + return "exptype=masonry_feed" in u.lower() + + def _classify_link_share(*, app_type: int, url: str, source_username: str, desc: str) -> tuple[str, str]: src = str(source_username or "").strip().lower() is_official_article = bool( @@ -647,7 +673,15 @@ def _classify_link_share(*, app_type: int, url: str, source_username: str, desc: hashtag_count = len(re.findall(r"#[^#\s]+", d)) # 公众号文章中「封面图 + 底栏标题」卡片特征:摘要以 #话题# 风格为主。 - link_style = "cover" if (is_official_article and (d.startswith("#") or hashtag_count >= 2)) else "default" + cover_like = bool( + is_official_article + and ( + d.startswith("#") + or hashtag_count >= 2 + or _is_mp_weixin_feed_article_url(url) + ) + ) + link_style = "cover" if cover_like else "default" return link_type, link_style @@ -948,8 +982,12 @@ def _parse_app_message(text: str) -> dict[str, Any]: "recordItem": record_item or "", } - if app_type in (5, 68) and url: - thumb_url = _normalize_xml_url(_extract_xml_tag_text(text, "thumburl")) + if app_type in (4, 5, 68) and url: + # Many appmsg link cards (notably Bilibili shares with 4) include a metadata block. + # DO NOT treat " dict[str, Any]: "quoteVoiceLength": quote_voice_length, } - if app_type == 62 or ".... + # Be strict here: lots of non-pat appmsg payloads still carry a nested ... metadata block. + patmsg_attr = bool(re.search(r"<(sysmsg|appmsg)\b[^>]*\btype=['\"]patmsg['\"]", lower)) + if app_type == 62 or patmsg_attr: return {"renderType": "system", "content": "[拍一拍]"} if app_type == 2000 or ( diff --git a/src/wechat_decrypt_tool/routers/chat.py b/src/wechat_decrypt_tool/routers/chat.py index 278b229..c896066 100644 --- a/src/wechat_decrypt_tool/routers/chat.py +++ b/src/wechat_decrypt_tool/routers/chat.py @@ -2742,6 +2742,90 @@ def _postprocess_transfer_messages(merged: list[dict[str, Any]]) -> None: # - 将原始转账消息(1/8)回填为“已被接收” # - 若同一 transferId 同时存在原始消息与 paysubtype=3 消息,则将 paysubtype=3 的那条校正为“已收款” + def _is_transfer_expired_system_message(text: Any) -> bool: + content = str(text or "").strip() + if not content: + return False + if "转账" not in content or "过期" not in content: + return False + if "未接收" in content and ("24小时" in content or "二十四小时" in content): + return True + return "已过期" in content and ("收款方" in content or "转账" in content) + + def _mark_pending_transfers_expired_by_system_messages() -> set[str]: + expired_system_times: list[int] = [] + pending_candidates: list[tuple[int, int]] = [] # (index, createTime) + + for idx, msg in enumerate(merged): + rt = str(msg.get("renderType") or "").strip() + if rt == "system": + if _is_transfer_expired_system_message(msg.get("content")): + try: + ts = int(msg.get("createTime") or 0) + except Exception: + ts = 0 + if ts > 0: + expired_system_times.append(ts) + continue + + if rt != "transfer": + continue + + pst = str(msg.get("paySubType") or "").strip() + if pst not in ("1", "8"): + continue + + try: + ts = int(msg.get("createTime") or 0) + except Exception: + ts = 0 + if ts <= 0: + continue + + pending_candidates.append((idx, ts)) + + if not expired_system_times or not pending_candidates: + return set() + + used_pending_indexes: set[int] = set() + expired_transfer_ids: set[str] = set() + + # 过期系统提示通常出现在转账发起约 24 小时后。 + # 为避免误匹配,要求时间差落在 [22h, 26h] 范围内,并选择最接近 24h 的待收款消息。 + for sys_ts in sorted(expired_system_times): + best_index = -1 + best_distance = 10**9 + + for idx, transfer_ts in pending_candidates: + if idx in used_pending_indexes: + continue + delta = sys_ts - transfer_ts + if delta < 0: + continue + if delta < 22 * 3600 or delta > 26 * 3600: + continue + + distance = abs(delta - 24 * 3600) + if distance < best_distance: + best_distance = distance + best_index = idx + + if best_index < 0: + continue + + used_pending_indexes.add(best_index) + transfer_msg = merged[best_index] + transfer_msg["paySubType"] = "10" + transfer_msg["transferStatus"] = "已过期" + + tid = str(transfer_msg.get("transferId") or "").strip() + if tid: + expired_transfer_ids.add(tid) + + return expired_transfer_ids + + expired_transfer_ids = _mark_pending_transfers_expired_by_system_messages() + returned_transfer_ids: set[str] = set() # 退还状态的 transferId received_transfer_ids: set[str] = set() # 已收款状态的 transferId returned_amounts_with_time: list[tuple[str, int]] = [] # (金额, 时间戳) 用于退还回退匹配 @@ -2828,6 +2912,8 @@ def _postprocess_transfer_messages(merged: list[dict[str, Any]]) -> None: tid = str(m.get("transferId") or "").strip() if not tid or tid not in pending_transfer_ids: continue + if tid in expired_transfer_ids: + continue mid = str(m.get("id") or "").strip() if mid and mid in backfilled_message_ids: continue diff --git a/src/wechat_decrypt_tool/routers/chat_export.py b/src/wechat_decrypt_tool/routers/chat_export.py index 7a94f10..3082294 100644 --- a/src/wechat_decrypt_tool/routers/chat_export.py +++ b/src/wechat_decrypt_tool/routers/chat_export.py @@ -12,17 +12,31 @@ from ..path_fix import PathFixRoute router = APIRouter(route_class=PathFixRoute) -ExportFormat = Literal["json", "txt"] +ExportFormat = Literal["json", "txt", "html"] ExportScope = Literal["selected", "all", "groups", "singles"] MediaKind = Literal["image", "emoji", "video", "video_thumb", "voice", "file"] -MessageType = Literal["text", "image", "emoji", "video", "voice", "file", "link", "transfer", "redPacket", "system", "quote", "voip"] +MessageType = Literal[ + "text", + "image", + "emoji", + "video", + "voice", + "chatHistory", + "file", + "link", + "transfer", + "redPacket", + "system", + "quote", + "voip", +] class ChatExportCreateRequest(BaseModel): account: Optional[str] = Field(None, description="账号目录名(可选,默认使用第一个)") scope: ExportScope = Field("selected", description="导出范围:selected=指定会话;all=全部;groups=仅群聊;singles=仅单聊") usernames: list[str] = Field(default_factory=list, description="会话 username 列表(scope=selected 时使用)") - format: ExportFormat = Field("json", description="导出格式:json 或 txt(zip 内每个会话一个文件)") + format: ExportFormat = Field("json", description="导出格式:json/txt/html(zip 内每个会话一个文件;html 可离线打开 index.html 查看)") start_time: Optional[int] = Field(None, description="起始时间(Unix 秒,含)") end_time: Optional[int] = Field(None, description="结束时间(Unix 秒,含)") include_hidden: bool = Field(False, description="是否包含隐藏会话(scope!=selected 时)") @@ -41,6 +55,10 @@ class ChatExportCreateRequest(BaseModel): False, description="预留字段:本项目不从微信进程提取媒体密钥,请使用 wx_key 获取并保存/批量解密", ) + download_remote_media: bool = Field( + False, + description="HTML 导出时允许联网下载链接/引用缩略图等远程媒体(提高离线完整性)", + ) privacy_mode: bool = Field( False, description="隐私模式导出:隐藏会话/用户名/内容,不打包头像与媒体", @@ -64,6 +82,7 @@ async def create_chat_export(req: ChatExportCreateRequest): message_types=req.message_types, output_dir=req.output_dir, allow_process_key_extract=req.allow_process_key_extract, + download_remote_media=req.download_remote_media, privacy_mode=req.privacy_mode, file_name=req.file_name, ) diff --git a/tests/test_chat_app_message_type4_patmsg_regression.py b/tests/test_chat_app_message_type4_patmsg_regression.py new file mode 100644 index 0000000..d5e7777 --- /dev/null +++ b/tests/test_chat_app_message_type4_patmsg_regression.py @@ -0,0 +1,50 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + +from wechat_decrypt_tool.chat_helpers import _parse_app_message + + +class TestChatAppMessageType4PatMsgRegression(unittest.TestCase): + def test_type4_link_with_patmsg_metadata_is_not_misclassified_as_pat(self): + raw_text = ( + "" + '' + "【中配】抽象可能让你的代码变差 - CodeAesthetic" + "UP主:黑纹白斑马" + "4" + "https://b23.tv/au68guF" + "哔哩哔哩" + "3057020100044b30" + "" + "" + "" + ) + + parsed = _parse_app_message(raw_text) + self.assertEqual(parsed.get("renderType"), "link") + self.assertEqual(parsed.get("url"), "https://b23.tv/au68guF") + self.assertEqual(parsed.get("title"), "【中配】抽象可能让你的代码变差 - CodeAesthetic") + self.assertEqual(parsed.get("from"), "哔哩哔哩") + self.assertNotEqual(parsed.get("content"), "[拍一拍]") + + def test_type62_is_still_pat(self): + raw_text = '"A" 拍了拍 "B"62' + parsed = _parse_app_message(raw_text) + self.assertEqual(parsed.get("renderType"), "system") + self.assertEqual(parsed.get("content"), "[拍一拍]") + + def test_sysmsg_type_patmsg_attr_is_still_pat(self): + raw_text = 'bar' + parsed = _parse_app_message(raw_text) + self.assertEqual(parsed.get("renderType"), "system") + self.assertEqual(parsed.get("content"), "[拍一拍]") + + +if __name__ == "__main__": + unittest.main() + diff --git a/tests/test_chat_export_chat_history_modal.py b/tests/test_chat_export_chat_history_modal.py new file mode 100644 index 0000000..192e487 --- /dev/null +++ b/tests/test_chat_export_chat_history_modal.py @@ -0,0 +1,218 @@ +import os +import hashlib +import sqlite3 +import sys +import unittest +import zipfile +import importlib +from pathlib import Path +from tempfile import TemporaryDirectory + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + + +class TestChatExportChatHistoryModal(unittest.TestCase): + _MD5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + def _reload_export_modules(self): + import wechat_decrypt_tool.app_paths as app_paths + import wechat_decrypt_tool.chat_helpers as chat_helpers + import wechat_decrypt_tool.media_helpers as media_helpers + import wechat_decrypt_tool.chat_export_service as chat_export_service + + importlib.reload(app_paths) + importlib.reload(chat_helpers) + importlib.reload(media_helpers) + importlib.reload(chat_export_service) + return chat_export_service + + def _seed_contact_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE contact ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + """ + CREATE TABLE stranger ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (account, "", "我", "", 1, 0, "", ""), + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (username, "", "测试好友", "", 1, 0, "", ""), + ) + conn.commit() + finally: + conn.close() + + def _seed_session_db(self, path: Path, *, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE SessionTable ( + username TEXT, + is_hidden INTEGER, + sort_timestamp INTEGER + ) + """ + ) + conn.execute( + "INSERT INTO SessionTable VALUES (?, ?, ?)", + (username, 0, 1735689600), + ) + conn.commit() + finally: + conn.close() + + def _seed_message_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)") + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account)) + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username)) + + table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}" + conn.execute( + f""" + CREATE TABLE {table_name} ( + local_id INTEGER, + server_id INTEGER, + local_type INTEGER, + sort_seq INTEGER, + real_sender_id INTEGER, + create_time INTEGER, + message_content TEXT, + compress_content BLOB + ) + """ + ) + + record_item = ( + "" + "" + "" + "2" + f"{self._MD5}" + "" + "" + "" + ) + chat_history_xml = ( + "" + "19" + "聊天记录" + "记录预览" + f"" + "" + ) + + conn.execute( + f"INSERT INTO {table_name} (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (1, 1001, 49, 1, 2, 1735689601, chat_history_xml, None), + ) + conn.commit() + finally: + conn.close() + + def _seed_media_files(self, account_dir: Path) -> None: + resource_root = account_dir / "resource" + (resource_root / "aa").mkdir(parents=True, exist_ok=True) + (resource_root / "aa" / f"{self._MD5}.jpg").write_bytes(b"\xff\xd8\xff\xd9") + + def _prepare_account(self, root: Path, *, account: str, username: str) -> Path: + account_dir = root / "output" / "databases" / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, username=username) + self._seed_session_db(account_dir / "session.db", username=username) + self._seed_message_db(account_dir / "message_0.db", account=account, username=username) + self._seed_media_files(account_dir) + return account_dir + + def _create_job(self, manager, *, account: str, username: str): + job = manager.create_job( + account=account, + scope="selected", + usernames=[username], + export_format="html", + start_time=None, + end_time=None, + include_hidden=False, + include_official=False, + include_media=True, + media_kinds=["image"], + message_types=["chatHistory", "image"], + output_dir=None, + allow_process_key_extract=False, + download_remote_media=False, + privacy_mode=False, + file_name=None, + ) + + for _ in range(200): + latest = manager.get_job(job.export_id) + if latest and latest.status in {"done", "error", "cancelled"}: + return latest + import time as _time + + _time.sleep(0.05) + self.fail("export job did not finish in time") + + def test_chat_history_modal_has_media_index_and_record_item(self): + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_test" + username = "wxid_friend" + self._prepare_account(root, account=account, username=username) + + prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR") + try: + os.environ["WECHAT_TOOL_DATA_DIR"] = str(root) + svc = self._reload_export_modules() + job = self._create_job(svc.CHAT_EXPORT_MANAGER, account=account, username=username) + self.assertEqual(job.status, "done", msg=job.error) + + with zipfile.ZipFile(job.zip_path, "r") as zf: + names = set(zf.namelist()) + self.assertIn(f"media/images/{self._MD5}.jpg", names) + + html_path = next((n for n in names if n.endswith("/messages.html")), "") + self.assertTrue(html_path) + html_text = zf.read(html_path).decode("utf-8") + self.assertIn('id="chatHistoryModal"', html_text) + self.assertIn('data-wce-chat-history="1"', html_text) + self.assertIn('data-record-item-b64="', html_text) + self.assertIn('id="wceMediaIndex"', html_text) + self.assertIn(self._MD5, html_text) + finally: + if prev_data is None: + os.environ.pop("WECHAT_TOOL_DATA_DIR", None) + else: + os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data diff --git a/tests/test_chat_export_html_format.py b/tests/test_chat_export_html_format.py new file mode 100644 index 0000000..d8e5bcf --- /dev/null +++ b/tests/test_chat_export_html_format.py @@ -0,0 +1,353 @@ +import os +import json +import hashlib +import sqlite3 +import sys +import unittest +import zipfile +import importlib +from pathlib import Path +from tempfile import TemporaryDirectory + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + + +class TestChatExportHtmlFormat(unittest.TestCase): + _FILE_MD5 = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + _VOICE_SERVER_ID = 2001 + + def _reload_export_modules(self): + import wechat_decrypt_tool.app_paths as app_paths + import wechat_decrypt_tool.chat_helpers as chat_helpers + import wechat_decrypt_tool.media_helpers as media_helpers + import wechat_decrypt_tool.chat_export_service as chat_export_service + + importlib.reload(app_paths) + importlib.reload(chat_helpers) + importlib.reload(media_helpers) + importlib.reload(chat_export_service) + return chat_export_service + + def _seed_contact_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE contact ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + """ + CREATE TABLE stranger ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (account, "", "我", "", 1, 0, "", ""), + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (username, "", "测试好友", "", 1, 0, "", ""), + ) + conn.commit() + finally: + conn.close() + + def _seed_session_db(self, path: Path, *, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE SessionTable ( + username TEXT, + is_hidden INTEGER, + sort_timestamp INTEGER + ) + """ + ) + conn.execute( + "INSERT INTO SessionTable VALUES (?, ?, ?)", + (username, 0, 1735689600), + ) + conn.commit() + finally: + conn.close() + + def _seed_message_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)") + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account)) + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username)) + + table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}" + conn.execute( + f""" + CREATE TABLE {table_name} ( + local_id INTEGER, + server_id INTEGER, + local_type INTEGER, + sort_seq INTEGER, + real_sender_id INTEGER, + create_time INTEGER, + message_content TEXT, + compress_content BLOB + ) + """ + ) + + image_xml = '' + voice_xml = '' + file_md5 = self._FILE_MD5 + file_xml = ( + "" + "6" + "demo.pdf" + "2048" + f"{file_md5}" + "" + ) + link_xml = ( + "" + "5" + "示例链接" + "这是描述" + "https://example.com/" + "https://example.com/thumb.jpg" + "gh_test" + "测试公众号" + "" + ) + chat_history_xml = ( + "" + "19" + "聊天记录" + "记录预览" + "张三: hi\n李四: ok" + "" + ) + transfer_xml = ( + "" + "2000" + "微信转账" + "" + "转账备注" + "¥1.23" + "3" + "transfer_123" + "" + "" + ) + red_packet_xml = ( + "" + "2001" + "红包" + "" + "恭喜发财,大吉大利" + "微信红包" + "" + "" + ) + voip_xml = ( + "" + "1" + "语音通话" + "" + ) + quote_voice_xml = ( + "" + "57" + "回复语音" + "" + "34" + f"{self._VOICE_SERVER_ID}" + "wxid_friend" + "测试好友" + "wxid_friend:3000:1:" + "" + "" + ) + rows = [ + (1, 1001, 3, 1, 2, 1735689601, image_xml, None), + (2, 1002, 1, 2, 2, 1735689602, "普通文本消息[微笑]", None), + (3, 1003, 49, 3, 1, 1735689603, transfer_xml, None), + (4, 1004, 49, 4, 2, 1735689604, red_packet_xml, None), + (5, 1005, 49, 5, 1, 1735689605, file_xml, None), + (6, 1006, 49, 6, 2, 1735689606, link_xml, None), + (7, 1007, 49, 7, 2, 1735689607, chat_history_xml, None), + (8, 1008, 50, 8, 2, 1735689608, voip_xml, None), + (9, self._VOICE_SERVER_ID, 34, 9, 1, 1735689609, voice_xml, None), + (10, 1010, 49, 10, 1, 1735689610, quote_voice_xml, None), + ] + conn.executemany( + f"INSERT INTO {table_name} (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + rows, + ) + conn.commit() + finally: + conn.close() + + def _seed_media_files(self, account_dir: Path) -> None: + resource_root = account_dir / "resource" + (resource_root / "aa").mkdir(parents=True, exist_ok=True) + (resource_root / "aa" / "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.jpg").write_bytes(b"\xff\xd8\xff\xd9") + (resource_root / "bb").mkdir(parents=True, exist_ok=True) + (resource_root / "bb" / f"{self._FILE_MD5}.dat").write_bytes(b"dummy") + + conn = sqlite3.connect(str(account_dir / "media_0.db")) + try: + conn.execute( + """ + CREATE TABLE VoiceInfo ( + svr_id INTEGER, + create_time INTEGER, + voice_data BLOB + ) + """ + ) + conn.execute( + "INSERT INTO VoiceInfo VALUES (?, ?, ?)", + (self._VOICE_SERVER_ID, 1735689609, b"SILK_VOICE_DATA"), + ) + conn.commit() + finally: + conn.close() + + def _prepare_account(self, root: Path, *, account: str, username: str) -> Path: + account_dir = root / "output" / "databases" / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, username=username) + self._seed_session_db(account_dir / "session.db", username=username) + self._seed_message_db(account_dir / "message_0.db", account=account, username=username) + self._seed_media_files(account_dir) + return account_dir + + def _create_job(self, manager, *, account: str, username: str): + job = manager.create_job( + account=account, + scope="selected", + usernames=[username], + export_format="html", + start_time=None, + end_time=None, + include_hidden=False, + include_official=False, + include_media=True, + media_kinds=["image", "emoji", "video", "video_thumb", "voice", "file"], + message_types=[], + output_dir=None, + allow_process_key_extract=False, + download_remote_media=False, + privacy_mode=False, + file_name=None, + ) + + for _ in range(200): + latest = manager.get_job(job.export_id) + if latest and latest.status in {"done", "error", "cancelled"}: + return latest + import time as _time + + _time.sleep(0.05) + self.fail("export job did not finish in time") + + def test_html_export_contains_index_and_conversation_page(self): + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_test" + username = "wxid_friend" + self._prepare_account(root, account=account, username=username) + + prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR") + try: + os.environ["WECHAT_TOOL_DATA_DIR"] = str(root) + svc = self._reload_export_modules() + job = self._create_job(svc.CHAT_EXPORT_MANAGER, account=account, username=username) + self.assertEqual(job.status, "done", msg=job.error) + + self.assertTrue(job.zip_path and job.zip_path.exists()) + with zipfile.ZipFile(job.zip_path, "r") as zf: + names = set(zf.namelist()) + + self.assertIn("index.html", names) + self.assertIn("assets/wechat-chat-export.css", names) + self.assertIn("assets/wechat-chat-export.js", names) + + manifest = json.loads(zf.read("manifest.json").decode("utf-8")) + self.assertEqual(manifest.get("format"), "html") + + html_path = next((n for n in names if n.endswith("/messages.html")), "") + self.assertTrue(html_path) + + html_text = zf.read(html_path).decode("utf-8") + self.assertIn('data-wce-rail-avatar="1"', html_text) + self.assertIn('data-wce-session-list="1"', html_text) + self.assertIn('id="sessionSearchInput"', html_text) + self.assertIn('data-wce-time-divider="1"', html_text) + self.assertIn('id="messageTypeFilter"', html_text) + self.assertIn('value="chatHistory"', html_text) + self.assertIn('id="chatHistoryModal"', html_text) + self.assertIn('data-wce-chat-history="1"', html_text) + self.assertIn('data-record-item-b64="', html_text) + self.assertIn('id="wceMediaIndex"', html_text) + self.assertIn('data-wce-quote-voice-btn="1"', html_text) + self.assertNotIn('title="刷新消息"', html_text) + self.assertNotIn('title="导出聊天记录"', html_text) + self.assertNotIn("搜索聊天记录", html_text) + self.assertNotIn("朋友圈", html_text) + self.assertNotIn("年度总结", html_text) + self.assertNotIn("设置", html_text) + self.assertNotIn("隐私模式", html_text) + + self.assertTrue(any(n.startswith("media/images/") for n in names)) + self.assertIn("../../media/images/", html_text) + + self.assertIn("wechat-transfer-card", html_text) + self.assertIn("wechat-redpacket-card", html_text) + self.assertIn("wechat-chat-history-card", html_text) + self.assertIn("wechat-voip-bubble", html_text) + self.assertIn("wechat-link-card", html_text) + self.assertIn("wechat-file-card", html_text) + self.assertIn("wechat-voice-wrapper", html_text) + + css_text = zf.read("assets/wechat-chat-export.css").decode("utf-8", errors="ignore") + self.assertIn("wechat-transfer-card", css_text) + self.assertNotIn("wechat-transfer-card[data-v-", css_text) + + js_text = zf.read("assets/wechat-chat-export.js").decode("utf-8", errors="ignore") + self.assertIn("wechat-voice-bubble", js_text) + self.assertIn("voice-playing", js_text) + self.assertIn("data-wce-quote-voice-btn", js_text) + + self.assertIn("assets/images/wechat/wechat-trans-icon1.png", names) + self.assertIn("assets/images/wechat/zip.png", names) + self.assertIn("assets/images/wechat/WeChat-Icon-Logo.wine.svg", names) + self.assertTrue(any(n.startswith("fonts/") and n.endswith(".woff2") for n in names)) + self.assertIn("wxemoji/Expression_1@2x.png", names) + self.assertIn("../../wxemoji/Expression_1@2x.png", html_text) + finally: + if prev_data is None: + os.environ.pop("WECHAT_TOOL_DATA_DIR", None) + else: + os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data diff --git a/tests/test_chat_export_image_md5_candidate_fallback.py b/tests/test_chat_export_image_md5_candidate_fallback.py new file mode 100644 index 0000000..401b716 --- /dev/null +++ b/tests/test_chat_export_image_md5_candidate_fallback.py @@ -0,0 +1,199 @@ +import os +import hashlib +import sqlite3 +import sys +import unittest +import zipfile +import importlib +from pathlib import Path +from tempfile import TemporaryDirectory + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + + +class TestChatExportImageMd5CandidateFallback(unittest.TestCase): + def _reload_export_modules(self): + import wechat_decrypt_tool.app_paths as app_paths + import wechat_decrypt_tool.chat_helpers as chat_helpers + import wechat_decrypt_tool.media_helpers as media_helpers + import wechat_decrypt_tool.chat_export_service as chat_export_service + + importlib.reload(app_paths) + importlib.reload(chat_helpers) + importlib.reload(media_helpers) + importlib.reload(chat_export_service) + return chat_export_service + + def _seed_contact_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE contact ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + """ + CREATE TABLE stranger ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (account, "", "我", "", 1, 0, "", ""), + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (username, "", "测试好友", "", 1, 0, "", ""), + ) + conn.commit() + finally: + conn.close() + + def _seed_session_db(self, path: Path, *, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE SessionTable ( + username TEXT, + is_hidden INTEGER, + sort_timestamp INTEGER + ) + """ + ) + conn.execute( + "INSERT INTO SessionTable VALUES (?, ?, ?)", + (username, 0, 1735689600), + ) + conn.commit() + finally: + conn.close() + + def _seed_message_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)") + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account)) + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username)) + + table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}" + conn.execute( + f""" + CREATE TABLE {table_name} ( + local_id INTEGER, + server_id INTEGER, + local_type INTEGER, + sort_seq INTEGER, + real_sender_id INTEGER, + create_time INTEGER, + message_content TEXT, + compress_content BLOB + ) + """ + ) + + good_md5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + bad_md5 = "ffffffffffffffffffffffffffffffff" + image_xml = f'' + + conn.execute( + f"INSERT INTO {table_name} (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (1, 1001, 3, 1, 2, 1735689601, image_xml, None), + ) + conn.commit() + finally: + conn.close() + + def _seed_decrypted_resource(self, account_dir: Path) -> None: + resource_root = account_dir / "resource" + (resource_root / "aa").mkdir(parents=True, exist_ok=True) + (resource_root / "aa" / "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.jpg").write_bytes(b"\xff\xd8\xff\xd9") + + def _prepare_account(self, root: Path, *, account: str, username: str) -> Path: + account_dir = root / "output" / "databases" / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, username=username) + self._seed_session_db(account_dir / "session.db", username=username) + self._seed_message_db(account_dir / "message_0.db", account=account, username=username) + self._seed_decrypted_resource(account_dir) + return account_dir + + def _create_job(self, manager, *, account: str, username: str): + job = manager.create_job( + account=account, + scope="selected", + usernames=[username], + export_format="html", + start_time=None, + end_time=None, + include_hidden=False, + include_official=False, + include_media=True, + media_kinds=["image"], + message_types=[], + output_dir=None, + allow_process_key_extract=False, + download_remote_media=False, + privacy_mode=False, + file_name=None, + ) + + for _ in range(200): + latest = manager.get_job(job.export_id) + if latest and latest.status in {"done", "error", "cancelled"}: + return latest + import time as _time + + _time.sleep(0.05) + self.fail("export job did not finish in time") + + def test_falls_back_to_secondary_md5_candidate(self): + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_test" + username = "wxid_friend" + self._prepare_account(root, account=account, username=username) + + prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR") + try: + os.environ["WECHAT_TOOL_DATA_DIR"] = str(root) + svc = self._reload_export_modules() + job = self._create_job(svc.CHAT_EXPORT_MANAGER, account=account, username=username) + self.assertEqual(job.status, "done", msg=job.error) + + with zipfile.ZipFile(job.zip_path, "r") as zf: + names = set(zf.namelist()) + self.assertIn("media/images/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.jpg", names) + self.assertFalse(any("ffffffffffffffffffffffffffffffff" in n for n in names if n.startswith("media/images/"))) + + html_path = next((n for n in names if n.endswith("/messages.html")), "") + self.assertTrue(html_path) + html_text = zf.read(html_path).decode("utf-8", errors="ignore") + self.assertIn("../../media/images/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.jpg", html_text) + finally: + if prev_data is None: + os.environ.pop("WECHAT_TOOL_DATA_DIR", None) + else: + os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data + diff --git a/tests/test_chat_export_image_md5_prefers_message_resource.py b/tests/test_chat_export_image_md5_prefers_message_resource.py new file mode 100644 index 0000000..1b9d942 --- /dev/null +++ b/tests/test_chat_export_image_md5_prefers_message_resource.py @@ -0,0 +1,235 @@ +import os +import hashlib +import sqlite3 +import sys +import unittest +import zipfile +import importlib +from pathlib import Path +from tempfile import TemporaryDirectory + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + + +class TestChatExportImageMd5PrefersMessageResource(unittest.TestCase): + def _reload_export_modules(self): + import wechat_decrypt_tool.app_paths as app_paths + import wechat_decrypt_tool.chat_helpers as chat_helpers + import wechat_decrypt_tool.media_helpers as media_helpers + import wechat_decrypt_tool.chat_export_service as chat_export_service + + importlib.reload(app_paths) + importlib.reload(chat_helpers) + importlib.reload(media_helpers) + importlib.reload(chat_export_service) + return chat_export_service + + def _seed_source_info(self, account_dir: Path) -> None: + wxid_dir = account_dir / "_wxid_dummy" + db_storage_dir = account_dir / "_db_storage_dummy" + wxid_dir.mkdir(parents=True, exist_ok=True) + db_storage_dir.mkdir(parents=True, exist_ok=True) + (account_dir / "_source.json").write_text( + '{"wxid_dir": "' + str(wxid_dir).replace("\\", "\\\\") + '", "db_storage_path": "' + str(db_storage_dir).replace("\\", "\\\\") + '"}', + encoding="utf-8", + ) + + def _seed_contact_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE contact ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + """ + CREATE TABLE stranger ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (account, "", "我", "", 1, 0, "", ""), + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (username, "", "测试好友", "", 1, 0, "", ""), + ) + conn.commit() + finally: + conn.close() + + def _seed_session_db(self, path: Path, *, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE SessionTable ( + username TEXT, + is_hidden INTEGER, + sort_timestamp INTEGER + ) + """ + ) + conn.execute( + "INSERT INTO SessionTable VALUES (?, ?, ?)", + (username, 0, 1735689600), + ) + conn.commit() + finally: + conn.close() + + def _seed_message_db(self, path: Path, *, account: str, username: str, bad_md5: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)") + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account)) + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username)) + + table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}" + conn.execute( + f""" + CREATE TABLE {table_name} ( + local_id INTEGER, + server_id INTEGER, + local_type INTEGER, + sort_seq INTEGER, + real_sender_id INTEGER, + create_time INTEGER, + message_content TEXT, + compress_content BLOB + ) + """ + ) + + image_xml = f'' + conn.execute( + f"INSERT INTO {table_name} (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (1, 1001, 3, 1, 2, 1735689601, image_xml, None), + ) + conn.commit() + finally: + conn.close() + + def _seed_message_resource_db(self, path: Path, *, good_md5: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE MessageResourceInfo ( + message_id INTEGER, + message_svr_id INTEGER, + message_local_type INTEGER, + chat_id INTEGER, + message_local_id INTEGER, + message_create_time INTEGER, + packed_info BLOB + ) + """ + ) + # packed_info may contain multiple tokens; include a realistic *.dat reference so the extractor prefers it. + packed_info = f"{good_md5}_t.dat".encode("ascii") + conn.execute( + "INSERT INTO MessageResourceInfo VALUES (?, ?, ?, ?, ?, ?, ?)", + (1, 1001, 3, 0, 1, 1735689601, packed_info), + ) + conn.commit() + finally: + conn.close() + + def _seed_decrypted_resource(self, account_dir: Path, *, good_md5: str) -> None: + resource_root = account_dir / "resource" + (resource_root / good_md5[:2]).mkdir(parents=True, exist_ok=True) + # Minimal JPEG payload (valid SOI/EOI). + (resource_root / good_md5[:2] / f"{good_md5}.jpg").write_bytes(b"\xff\xd8\xff\xd9") + + def _prepare_account(self, root: Path, *, account: str, username: str, bad_md5: str, good_md5: str) -> Path: + account_dir = root / "output" / "databases" / account + account_dir.mkdir(parents=True, exist_ok=True) + self._seed_source_info(account_dir) + self._seed_contact_db(account_dir / "contact.db", account=account, username=username) + self._seed_session_db(account_dir / "session.db", username=username) + self._seed_message_db(account_dir / "message_0.db", account=account, username=username, bad_md5=bad_md5) + self._seed_message_resource_db(account_dir / "message_resource.db", good_md5=good_md5) + self._seed_decrypted_resource(account_dir, good_md5=good_md5) + return account_dir + + def _create_job(self, manager, *, account: str, username: str): + job = manager.create_job( + account=account, + scope="selected", + usernames=[username], + export_format="html", + start_time=None, + end_time=None, + include_hidden=False, + include_official=False, + include_media=True, + media_kinds=["image"], + message_types=["image"], + output_dir=None, + allow_process_key_extract=False, + download_remote_media=False, + privacy_mode=False, + file_name=None, + ) + + for _ in range(200): + latest = manager.get_job(job.export_id) + if latest and latest.status in {"done", "error", "cancelled"}: + return latest + import time as _time + + _time.sleep(0.05) + self.fail("export job did not finish in time") + + def test_prefers_message_resource_md5_over_xml_md5(self): + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_test" + username = "wxid_friend" + bad_md5 = "ffffffffffffffffffffffffffffffff" + good_md5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + self._prepare_account(root, account=account, username=username, bad_md5=bad_md5, good_md5=good_md5) + + prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR") + try: + os.environ["WECHAT_TOOL_DATA_DIR"] = str(root) + svc = self._reload_export_modules() + job = self._create_job(svc.CHAT_EXPORT_MANAGER, account=account, username=username) + self.assertEqual(job.status, "done", msg=job.error) + + with zipfile.ZipFile(job.zip_path, "r") as zf: + names = set(zf.namelist()) + self.assertIn(f"media/images/{good_md5}.jpg", names) + + html_path = next((n for n in names if n.endswith("/messages.html")), "") + self.assertTrue(html_path) + html_text = zf.read(html_path).decode("utf-8", errors="ignore") + self.assertIn(f"../../media/images/{good_md5}.jpg", html_text) + finally: + if prev_data is None: + os.environ.pop("WECHAT_TOOL_DATA_DIR", None) + else: + os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data + diff --git a/tests/test_chat_export_message_types_semantics.py b/tests/test_chat_export_message_types_semantics.py index d641bb8..7152753 100644 --- a/tests/test_chat_export_message_types_semantics.py +++ b/tests/test_chat_export_message_types_semantics.py @@ -198,6 +198,7 @@ class TestChatExportMessageTypesSemantics(unittest.TestCase): message_types=message_types, output_dir=None, allow_process_key_extract=False, + download_remote_media=False, privacy_mode=privacy_mode, file_name=None, ) diff --git a/tests/test_chat_export_remote_thumb_option.py b/tests/test_chat_export_remote_thumb_option.py new file mode 100644 index 0000000..e587fef --- /dev/null +++ b/tests/test_chat_export_remote_thumb_option.py @@ -0,0 +1,304 @@ +import os +import hashlib +import sqlite3 +import sys +import unittest +import zipfile +import importlib +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest import mock + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + + +class _FakeResponse: + def __init__(self, body: bytes, *, content_type: str) -> None: + self.status_code = 200 + self.headers = { + "Content-Type": str(content_type or "").strip(), + "Content-Length": str(len(body)), + } + self._body = body + + def iter_content(self, chunk_size=65536): + data = self._body or b"" + for i in range(0, len(data), int(chunk_size or 65536)): + yield data[i : i + int(chunk_size or 65536)] + + def close(self): + return None + + +class TestChatExportRemoteThumbOption(unittest.TestCase): + def _reload_export_modules(self): + import wechat_decrypt_tool.app_paths as app_paths + import wechat_decrypt_tool.chat_helpers as chat_helpers + import wechat_decrypt_tool.media_helpers as media_helpers + import wechat_decrypt_tool.chat_export_service as chat_export_service + + importlib.reload(app_paths) + importlib.reload(chat_helpers) + importlib.reload(media_helpers) + importlib.reload(chat_export_service) + return chat_export_service + + def _seed_contact_db(self, path: Path, *, account: str, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE contact ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + """ + CREATE TABLE stranger ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (account, "", "我", "", 1, 0, "", ""), + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (username, "", "测试好友", "", 1, 0, "", ""), + ) + conn.commit() + finally: + conn.close() + + def _seed_session_db(self, path: Path, *, username: str) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE SessionTable ( + username TEXT, + is_hidden INTEGER, + sort_timestamp INTEGER + ) + """ + ) + conn.execute( + "INSERT INTO SessionTable VALUES (?, ?, ?)", + (username, 0, 1735689600), + ) + conn.commit() + finally: + conn.close() + + def _seed_message_db(self, path: Path, *, account: str, username: str) -> tuple[str, str]: + conn = sqlite3.connect(str(path)) + try: + conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)") + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account)) + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username)) + + table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}" + conn.execute( + f""" + CREATE TABLE {table_name} ( + local_id INTEGER, + server_id INTEGER, + local_type INTEGER, + sort_seq INTEGER, + real_sender_id INTEGER, + create_time INTEGER, + message_content TEXT, + compress_content BLOB + ) + """ + ) + + link_thumb = "https://1.1.1.1/thumb.png" + quote_thumb = "https://1.1.1.1/quote.png" + + link_xml = ( + "" + "5" + "示例链接" + "这是描述" + "https://example.com/" + f"{link_thumb}" + "" + ) + quote_xml = ( + "" + "57" + "回复" + "" + "49" + "8888" + "wxid_other" + "对方" + "" + "5被引用链接https://example.com/" + f"{quote_thumb}" + "" + "" + "" + "" + ) + + rows = [ + (1, 1001, 49, 1, 2, 1735689601, link_xml, None), + (2, 1002, 49, 2, 2, 1735689602, quote_xml, None), + ] + conn.executemany( + f"INSERT INTO {table_name} (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + rows, + ) + conn.commit() + return link_thumb, quote_thumb + finally: + conn.close() + + def _prepare_account(self, root: Path, *, account: str, username: str) -> tuple[Path, str, str]: + account_dir = root / "output" / "databases" / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, username=username) + self._seed_session_db(account_dir / "session.db", username=username) + link_thumb, quote_thumb = self._seed_message_db(account_dir / "message_0.db", account=account, username=username) + return account_dir, link_thumb, quote_thumb + + def _create_job(self, manager, *, account: str, username: str, download_remote_media: bool): + job = manager.create_job( + account=account, + scope="selected", + usernames=[username], + export_format="html", + start_time=None, + end_time=None, + include_hidden=False, + include_official=False, + include_media=True, + media_kinds=["image", "emoji", "video", "video_thumb", "voice", "file"], + message_types=["link", "quote", "image"], + output_dir=None, + allow_process_key_extract=False, + download_remote_media=download_remote_media, + privacy_mode=False, + file_name=None, + ) + + for _ in range(200): + latest = manager.get_job(job.export_id) + if latest and latest.status in {"done", "error", "cancelled"}: + return latest + import time as _time + + _time.sleep(0.05) + self.fail("export job did not finish in time") + + def test_remote_thumb_disabled_does_not_download(self): + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_test" + username = "wxid_friend" + _, link_thumb, quote_thumb = self._prepare_account(root, account=account, username=username) + + prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR") + try: + os.environ["WECHAT_TOOL_DATA_DIR"] = str(root) + svc = self._reload_export_modules() + + with mock.patch.object( + svc.requests, + "get", + side_effect=AssertionError("requests.get should not be called when download_remote_media=False"), + ) as m_get: + job = self._create_job( + svc.CHAT_EXPORT_MANAGER, + account=account, + username=username, + download_remote_media=False, + ) + self.assertEqual(job.status, "done", msg=job.error) + self.assertEqual(m_get.call_count, 0) + + with zipfile.ZipFile(job.zip_path, "r") as zf: + names = set(zf.namelist()) + html_path = next((n for n in names if n.endswith("/messages.html")), "") + self.assertTrue(html_path) + html_text = zf.read(html_path).decode("utf-8") + self.assertIn(f'src="{link_thumb}"', html_text) + self.assertIn(f'src="{quote_thumb}"', html_text) + self.assertFalse(any(n.startswith("media/remote/") for n in names)) + finally: + if prev_data is None: + os.environ.pop("WECHAT_TOOL_DATA_DIR", None) + else: + os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data + + def test_remote_thumb_enabled_downloads_and_rewrites(self): + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_test" + username = "wxid_friend" + _, link_thumb, quote_thumb = self._prepare_account(root, account=account, username=username) + + prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR") + try: + os.environ["WECHAT_TOOL_DATA_DIR"] = str(root) + svc = self._reload_export_modules() + + fake_png = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde" + + def _fake_get(url, **_kwargs): + return _FakeResponse(fake_png, content_type="image/png") + + with mock.patch.object(svc.requests, "get", side_effect=_fake_get) as m_get: + job = self._create_job( + svc.CHAT_EXPORT_MANAGER, + account=account, + username=username, + download_remote_media=True, + ) + self.assertEqual(job.status, "done", msg=job.error) + self.assertGreaterEqual(m_get.call_count, 1) + + with zipfile.ZipFile(job.zip_path, "r") as zf: + names = set(zf.namelist()) + html_path = next((n for n in names if n.endswith("/messages.html")), "") + self.assertTrue(html_path) + html_text = zf.read(html_path).decode("utf-8") + + h1 = hashlib.sha256(link_thumb.encode("utf-8", errors="ignore")).hexdigest() + arc1 = f"media/remote/{h1[:32]}.png" + self.assertIn(arc1, names) + self.assertIn(f"../../{arc1}", html_text) + self.assertNotIn(f'src="{link_thumb}"', html_text) + + h2 = hashlib.sha256(quote_thumb.encode("utf-8", errors="ignore")).hexdigest() + arc2 = f"media/remote/{h2[:32]}.png" + self.assertIn(arc2, names) + self.assertIn(f"../../{arc2}", html_text) + self.assertNotIn(f'src="{quote_thumb}"', html_text) + finally: + if prev_data is None: + os.environ.pop("WECHAT_TOOL_DATA_DIR", None) + else: + os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data + diff --git a/tests/test_chat_official_article_cover_style.py b/tests/test_chat_official_article_cover_style.py new file mode 100644 index 0000000..40f303c --- /dev/null +++ b/tests/test_chat_official_article_cover_style.py @@ -0,0 +1,58 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + +from wechat_decrypt_tool.chat_helpers import _parse_app_message + + +class TestChatOfficialArticleCoverStyle(unittest.TestCase): + def test_mp_weixin_feed_url_is_cover_style(self): + raw_text = ( + "" + "" + "时尚穿搭:「这样的jk你喜欢吗」" + "这样的jk你喜欢吗?" + "5" + "" + "http://mp.weixin.qq.com/s?__biz=MzkxOTY4MjIxOA==&mid=2247508015&idx=1&sn=931dce677c6e70b4365792b14e7e8ff0" + "&exptype=masonry_feed_brief_content_elite_for_pcfeeds_u2i&ranksessionid=1770868256_1&req_id=1770867949535989#rd" + "" + "https://mmbiz.qpic.cn/sz_mmbiz_jpg/foo/640?wx_fmt=jpeg&wxfrom=401" + "甜图社" + "gh_abc123" + "" + "" + ) + + parsed = _parse_app_message(raw_text) + self.assertEqual(parsed.get("renderType"), "link") + self.assertEqual(parsed.get("linkType"), "official_article") + self.assertEqual(parsed.get("linkStyle"), "cover") + + def test_mp_weixin_non_feed_url_keeps_default_style(self): + raw_text = ( + "" + "" + "普通分享" + "这样的jk你喜欢吗?" + "5" + "http://mp.weixin.qq.com/s?__biz=foo&mid=1&idx=1&sn=bar#rd" + "甜图社" + "gh_abc123" + "" + "" + ) + + parsed = _parse_app_message(raw_text) + self.assertEqual(parsed.get("renderType"), "link") + self.assertEqual(parsed.get("linkType"), "official_article") + self.assertEqual(parsed.get("linkStyle"), "default") + + +if __name__ == "__main__": + unittest.main() + diff --git a/tests/test_transfer_postprocess.py b/tests/test_transfer_postprocess.py index 965ceea..3f6fea4 100644 --- a/tests/test_transfer_postprocess.py +++ b/tests/test_transfer_postprocess.py @@ -62,7 +62,68 @@ class TestTransferPostprocess(unittest.TestCase): self.assertEqual(merged[0].get("transferStatus"), "已被接收") + def test_pending_transfer_marked_expired_by_system_message(self): + merged = [ + { + "id": "message_0:Msg_x:100", + "renderType": "transfer", + "paySubType": "1", + "transferId": "t-expired-1", + "amount": "¥500.00", + "createTime": 1770742598, + "isSent": True, + "transferStatus": "转账", + }, + { + "id": "message_0:Msg_x:101", + "renderType": "system", + "type": 10000, + "createTime": 1770829000, + "content": "收款方24小时内未接收你的转账,已过期", + }, + ] + + chat_router._postprocess_transfer_messages(merged) + + self.assertEqual(merged[0].get("paySubType"), "10") + self.assertEqual(merged[0].get("transferStatus"), "已过期") + + def test_expired_matching_wins_over_amount_time_received_fallback(self): + merged = [ + { + "id": "message_0:Msg_x:200", + "renderType": "transfer", + "paySubType": "1", + "transferId": "t-expired-2", + "amount": "¥500.00", + "createTime": 1770742598, + "isSent": True, + "transferStatus": "", + }, + { + "id": "message_0:Msg_x:201", + "renderType": "transfer", + "paySubType": "3", + "transferId": "t-other", + "amount": "¥500.00", + "createTime": 1770828800, + "isSent": False, + "transferStatus": "已收款", + }, + { + "id": "message_0:Msg_x:202", + "renderType": "system", + "type": 10000, + "createTime": 1770829000, + "content": "收款方24小时内未接收你的转账,已过期", + }, + ] + + chat_router._postprocess_transfer_messages(merged) + + self.assertEqual(merged[0].get("paySubType"), "10") + self.assertEqual(merged[0].get("transferStatus"), "已过期") + if __name__ == "__main__": unittest.main() - From 7a0c39e39d9e6051e81c143df7b7b5e5056c930c Mon Sep 17 00:00:00 2001 From: 2977094657 <2977094657@qq.com> Date: Fri, 13 Feb 2026 22:40:39 +0800 Subject: [PATCH 2/3] =?UTF-8?q?feat(wrapped):=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E6=A2=97=E5=9B=BE=E5=B9=B4=E9=89=B4=EF=BC=88Emoji=20Universe?= =?UTF-8?q?=EF=BC=89=E5=8D=A1=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 后端新增 card_04_emoji_universe:统计表情包/emoji 使用与画像 - 前端新增 Card04EmojiUniverse + VueBits Stack/ImageTrail 交互展示 - 更新 Wrapped manifest/Hero 预览与用例覆盖 --- .../wrapped/cards/Card04EmojiUniverse.vue | 793 +++++++++++ .../wrapped/shared/VueBitsImageTrail.vue | 1225 ++++++++++++++++ .../wrapped/shared/VueBitsStack.vue | 294 ++++ .../components/wrapped/shared/WrappedHero.vue | 4 + frontend/pages/wrapped/index.vue | 6 + .../wrapped/cards/card_04_emoji_universe.py | 1265 +++++++++++++++++ src/wechat_decrypt_tool/wrapped/service.py | 18 +- tests/test_wrapped_emoji_universe.py | 773 ++++++++++ 8 files changed, 4375 insertions(+), 3 deletions(-) create mode 100644 frontend/components/wrapped/cards/Card04EmojiUniverse.vue create mode 100644 frontend/components/wrapped/shared/VueBitsImageTrail.vue create mode 100644 frontend/components/wrapped/shared/VueBitsStack.vue create mode 100644 src/wechat_decrypt_tool/wrapped/cards/card_04_emoji_universe.py create mode 100644 tests/test_wrapped_emoji_universe.py diff --git a/frontend/components/wrapped/cards/Card04EmojiUniverse.vue b/frontend/components/wrapped/cards/Card04EmojiUniverse.vue new file mode 100644 index 0000000..a5a6596 --- /dev/null +++ b/frontend/components/wrapped/cards/Card04EmojiUniverse.vue @@ -0,0 +1,793 @@ + + + + + diff --git a/frontend/components/wrapped/shared/VueBitsImageTrail.vue b/frontend/components/wrapped/shared/VueBitsImageTrail.vue new file mode 100644 index 0000000..612b5cc --- /dev/null +++ b/frontend/components/wrapped/shared/VueBitsImageTrail.vue @@ -0,0 +1,1225 @@ + + + + + + diff --git a/frontend/components/wrapped/shared/VueBitsStack.vue b/frontend/components/wrapped/shared/VueBitsStack.vue new file mode 100644 index 0000000..c191c7e --- /dev/null +++ b/frontend/components/wrapped/shared/VueBitsStack.vue @@ -0,0 +1,294 @@ + + + + + diff --git a/frontend/components/wrapped/shared/WrappedHero.vue b/frontend/components/wrapped/shared/WrappedHero.vue index 13f5d5b..66c7ed4 100644 --- a/frontend/components/wrapped/shared/WrappedHero.vue +++ b/frontend/components/wrapped/shared/WrappedHero.vue @@ -329,6 +329,10 @@ const PREVIEW_BY_KIND = { 'chat/reply_speed': { summary: '回复速度', question: '谁是你愿意秒回的那个人?' + }, + 'emoji/annual_universe': { + summary: '梗图年鉴', + question: '你这一年最常丢出的表情包是哪张?' } } diff --git a/frontend/pages/wrapped/index.vue b/frontend/pages/wrapped/index.vue index 17cedad..7429958 100644 --- a/frontend/pages/wrapped/index.vue +++ b/frontend/pages/wrapped/index.vue @@ -163,6 +163,12 @@ variant="slide" class="h-full w-full" /> + [^"]+)"\s*:\s*"(?P[^"]+)"\s*,?\s*$') +_MD5_HEX_RE = re.compile(r"(?i)[0-9a-f]{32}") +_EXPRESSION_ASSET_RE = re.compile(r"^Expression_(\d+)@2x\.png$") +_EMOJI_VS16 = "\ufe0f" +_EMOJI_ZWJ = "\u200d" +_EMOJI_KEYCAP = "\u20e3" + + +def _is_regional_indicator(ch: str) -> bool: + if not ch: + return False + cp = ord(ch) + return 0x1F1E6 <= cp <= 0x1F1FF + + +def _is_emoji_modifier(ch: str) -> bool: + if not ch: + return False + cp = ord(ch) + return 0x1F3FB <= cp <= 0x1F3FF + + +def _is_emoji_base(ch: str) -> bool: + if not ch: + return False + cp = ord(ch) + return ( + (0x1F300 <= cp <= 0x1FAFF) + or (0x2600 <= cp <= 0x26FF) + or (0x2700 <= cp <= 0x27BF) + or (0x1F1E6 <= cp <= 0x1F1FF) + or cp in {0x00A9, 0x00AE, 0x203C, 0x2049, 0x2122, 0x2139, 0x3030, 0x303D, 0x3297, 0x3299} + or cp == 0x1F004 + or (0x1F170 <= cp <= 0x1F251) + ) + + +def _extract_unicode_emoji_tokens(text: str) -> list[str]: + s = str(text or "") + if not s: + return [] + + out: list[str] = [] + i = 0 + n = len(s) + while i < n: + ch = s[i] + + # keycap emoji: [0-9#*][VS16]?U+20E3 + if ch in "0123456789#*": + j = i + 1 + if j < n and s[j] == _EMOJI_VS16: + j += 1 + if j < n and s[j] == _EMOJI_KEYCAP: + out.append(s[i : j + 1]) + i = j + 1 + continue + + # flags + if _is_regional_indicator(ch): + if (i + 1) < n and _is_regional_indicator(s[i + 1]): + out.append(s[i : i + 2]) + i += 2 + else: + out.append(ch) + i += 1 + continue + + if not _is_emoji_base(ch): + i += 1 + continue + + token: list[str] = [ch] + j = i + 1 + if j < n and s[j] == _EMOJI_VS16: + token.append(s[j]) + j += 1 + if j < n and _is_emoji_modifier(s[j]): + token.append(s[j]) + j += 1 + + # Handle ZWJ chains. + while (j + 1) < n and s[j] == _EMOJI_ZWJ and _is_emoji_base(s[j + 1]): + token.append(s[j]) + token.append(s[j + 1]) + j += 2 + if j < n and s[j] == _EMOJI_VS16: + token.append(s[j]) + j += 1 + if j < n and _is_emoji_modifier(s[j]): + token.append(s[j]) + j += 1 + + out.append("".join(token)) + i = j + + return out + + +def _emoji_key_priority(key: str) -> tuple[int, int, str]: + s = str(key or "").strip() + if not s: + return (9, 9, "") + if re.fullmatch(r"\[[\u4e00-\u9fff]+\]", s): + return (0, len(s), s) + if re.fullmatch(r"/[\u4e00-\u9fff]+", s): + return (1, len(s), s) + if re.fullmatch(r"\[[A-Za-z][A-Za-z0-9_ ]*\]", s): + return (2, len(s), s) + if re.fullmatch(r"/:[^/\s]+", s): + return (3, len(s), s) + return (4, len(s), s) + + +def _normalize_index_text_for_emoji_match(text: str) -> str: + """ + Our chat search index stores `message_fts.text` as `_to_char_token_text`, i.e.: + - lowercased + - whitespace removed + - every character joined by single spaces + + Example: "[捂脸]" -> "[ 捂 脸 ]" + For emoji matching, we normalize it back by removing whitespace and lowercasing. + """ + + return "".join(ch for ch in str(text or "").lower() if not ch.isspace()) + + +def _iter_protobuf_varints(blob: bytes) -> list[tuple[int, int]]: + out: list[tuple[int, int]] = [] + data = bytes(blob or b"") + if not data: + return out + + i = 0 + n = len(data) + while i < n: + key = int(data[i]) + i += 1 + field = int(key >> 3) + wire_type = int(key & 0x07) + + if wire_type == 0: # varint + shift = 0 + value = 0 + while i < n: + b = int(data[i]) + i += 1 + value |= (b & 0x7F) << shift + if b < 0x80: + break + shift += 7 + out.append((field, int(value))) + continue + + if wire_type == 1: # 64-bit + i += 8 + continue + + if wire_type == 2: # length-delimited + shift = 0 + ln = 0 + while i < n: + b = int(data[i]) + i += 1 + ln |= (b & 0x7F) << shift + if b < 0x80: + break + shift += 7 + i += int(ln) + continue + + if wire_type == 5: # 32-bit + i += 4 + continue + + break + + return out + + +def _extract_packed_emoji_meta(packed_info_data: Any) -> tuple[Optional[int], Optional[int]]: + data: bytes = b"" + if packed_info_data is None: + return None, None + if isinstance(packed_info_data, memoryview): + data = packed_info_data.tobytes() + elif isinstance(packed_info_data, (bytes, bytearray)): + data = bytes(packed_info_data) + elif isinstance(packed_info_data, str): + s = packed_info_data.strip() + if s: + try: + data = bytes.fromhex(s) if (len(s) % 2 == 0 and re.fullmatch(r"(?i)[0-9a-f]+", s)) else s.encode( + "utf-8", + errors="ignore", + ) + except Exception: + data = b"" + if not data: + return None, None + + field1: Optional[int] = None + field2: Optional[int] = None + for f, v in _iter_protobuf_varints(data): + if f == 1 and field1 is None: + field1 = int(v) + elif f == 2 and field2 is None: + field2 = int(v) + if field1 is not None and field2 is not None: + break + return field1, field2 + + +def _year_range_epoch_seconds(year: int) -> tuple[int, int]: + start = int(datetime(year, 1, 1).timestamp()) + end = int(datetime(year + 1, 1, 1).timestamp()) + return start, end + + +def _mask_name(name: str) -> str: + s = str(name or "").strip() + if not s: + return "" + if len(s) == 1: + return "*" + if len(s) == 2: + return s[0] + "*" + return s[0] + ("*" * (len(s) - 2)) + s[-1] + + +def _weekday_name_zh(weekday_index: int) -> str: + labels = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"] + if 0 <= weekday_index < len(labels): + return labels[weekday_index] + return "" + + +def _list_message_tables(conn: sqlite3.Connection) -> list[str]: + try: + rows = conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() + except Exception: + return [] + out: list[str] = [] + for r in rows: + if not r or not r[0]: + continue + raw_name = r[0] + if isinstance(raw_name, memoryview): + raw_name = raw_name.tobytes() + if isinstance(raw_name, (bytes, bytearray)): + try: + name = bytes(raw_name).decode("utf-8", errors="ignore") + except Exception: + continue + else: + name = str(raw_name) + ln = name.lower() + if ln.startswith(("msg_", "chat_")): + out.append(name) + return out + + +def _list_session_usernames(session_db_path: Path) -> list[str]: + if not session_db_path.exists(): + return [] + conn = sqlite3.connect(str(session_db_path)) + try: + try: + rows = conn.execute("SELECT username FROM SessionTable").fetchall() + except sqlite3.OperationalError: + rows = conn.execute("SELECT username FROM Session").fetchall() + except Exception: + rows = [] + finally: + try: + conn.close() + except Exception: + pass + + out: list[str] = [] + for r in rows: + if not r or not r[0]: + continue + u = str(r[0]).strip() + if u: + out.append(u) + return out + + +@functools.lru_cache(maxsize=1) +def _load_wechat_emoji_table() -> dict[str, str]: + repo_root = Path(__file__).resolve().parents[4] + path = repo_root / "frontend" / "utils" / "wechat-emojis.ts" + try: + text = path.read_text(encoding="utf-8") + except Exception: + return {} + + table: dict[str, str] = {} + for line in text.splitlines(): + stripped = line.strip() + if (not stripped) or stripped.startswith("//"): + continue + m = _TS_WECHAT_EMOJI_ENTRY_RE.match(line) + if not m: + continue + key = str(m.group("key") or "") + value = str(m.group("value") or "") + if key and value: + table[key] = value + return table + + +@functools.lru_cache(maxsize=1) +def _load_wechat_emoji_regex() -> Optional[re.Pattern[str]]: + table = _load_wechat_emoji_table() + if not table: + return None + keys = sorted(table.keys(), key=len, reverse=True) + escaped = [re.escape(k) for k in keys if k] + if not escaped: + return None + try: + return re.compile(f"({'|'.join(escaped)})") + except Exception: + return None + + +@functools.lru_cache(maxsize=1) +def _load_wechat_text_emoji_matcher() -> tuple[Optional[re.Pattern[str]], dict[str, str]]: + """ + Build a matcher for extracting WeChat "small yellow face" codes from `message_fts.text`. + + Note: `message_fts.text` is stored as a char-tokenized string (see `_normalize_index_text_for_emoji_match`), + so we match against normalized keys (lowercased + whitespace removed). + + Returns: + - regex: matches normalized keys + - norm_key -> canonical key (used as the public label) + """ + + table = _load_wechat_emoji_table() + if not table: + return None, {} + + asset_to_keys: dict[str, list[str]] = {} + for key, value in table.items(): + asset = str(value or "").strip() + if not asset: + continue + asset_to_keys.setdefault(asset, []).append(str(key or "")) + + asset_to_label: dict[str, str] = {} + for asset, keys in asset_to_keys.items(): + keys2 = [k for k in keys if k] + if not keys2: + continue + asset_to_label[asset] = sorted(keys2, key=_emoji_key_priority)[0] + + norm_to_label: dict[str, str] = {} + for key, value in table.items(): + asset = str(value or "").strip() + label = asset_to_label.get(asset) + if not label: + continue + nk = _normalize_index_text_for_emoji_match(str(key or "")) + if not nk: + continue + norm_to_label.setdefault(nk, label) + + keys_norm = sorted(norm_to_label.keys(), key=len, reverse=True) + escaped = [re.escape(k) for k in keys_norm if k] + if not escaped: + return None, norm_to_label + try: + return re.compile(f"({'|'.join(escaped)})"), norm_to_label + except Exception: + return None, norm_to_label + + +@functools.lru_cache(maxsize=1) +def _load_wechat_expression_catalog() -> tuple[dict[int, str], dict[int, str]]: + table = _load_wechat_emoji_table() + if not table: + return {}, {} + + id_to_asset: dict[int, str] = {} + asset_to_keys: dict[str, list[str]] = {} + for key, value in table.items(): + asset = str(value or "").strip() + m = _EXPRESSION_ASSET_RE.fullmatch(asset) + if not m: + continue + try: + expr_id = int(m.group(1)) + except Exception: + continue + if expr_id <= 0: + continue + id_to_asset.setdefault(expr_id, asset) + asset_to_keys.setdefault(asset, []).append(str(key or "")) + + id_to_label: dict[int, str] = {} + for expr_id, asset in id_to_asset.items(): + keys = [k for k in asset_to_keys.get(asset, []) if k] + if not keys: + continue + keys_sorted = sorted(keys, key=_emoji_key_priority) + id_to_label[expr_id] = keys_sorted[0] + + return id_to_asset, id_to_label + + +def _pick_persona( + *, + sent_sticker_count: int, + sticker_share: float, + peak_hour: Optional[int], + top_text_emoji_count: int, +) -> dict[str, str]: + if sent_sticker_count <= 0 and top_text_emoji_count <= 0: + return {"code": "quiet_observer", "label": "静默观察员", "reason": "你今年几乎没靠表情表达。"} + + if peak_hour is not None and 0 <= int(peak_hour) <= 4 and sent_sticker_count >= 50: + return {"code": "midnight_sticker_king", "label": "午夜斗图王", "reason": "高峰活跃在深夜,夜聊斗图火力很足。"} + + if top_text_emoji_count >= 20 and top_text_emoji_count >= int(sent_sticker_count * 0.6): + return {"code": "text_emoji_narrator", "label": "小黄脸叙事家", "reason": "你更常把小黄脸嵌进文字,表达更细腻。"} + + if sticker_share >= 0.45 and sent_sticker_count >= 80: + return {"code": "sticker_machine_gun", "label": "表情包机关枪", "reason": "在你的表达里,表情包占比非常高。"} + + return {"code": "steady_fighter", "label": "稳健斗图手", "reason": "斗图稳定输出,节奏和分寸都在线。"} + + +def _build_local_emoji_url( + *, + account_name: str, + md5: str, + username: str, + emoji_remote_url: str, +) -> str: + base = f"/api/chat/media/emoji?account={quote(account_name)}&md5={quote(md5)}" + if username: + base += f"&username={quote(username)}" + if emoji_remote_url: + base += f"&emoji_url={quote(emoji_remote_url, safe='')}" + return base + + +def compute_emoji_universe_stats(*, account_dir: Path, year: int) -> dict[str, Any]: + start_ts, end_ts = _year_range_epoch_seconds(year) + my_username = str(account_dir.name or "").strip() + + sent_sticker_count = 0 + total_sent_messages = 0 + sticker_active_days: set[str] = set() + hour_counts: Counter[int] = Counter() + weekday_counts: Counter[int] = Counter() + sticker_by_username: Counter[str] = Counter() + text_emoji_counts: Counter[str] = Counter() + unicode_emoji_counts: Counter[str] = Counter() + wechat_emoji_counts: Counter[int] = Counter() + + sticker_key_counts: Counter[str] = Counter() + sticker_key_md5: dict[str, str] = {} + sticker_key_expr_id: dict[str, int] = {} + sticker_url_map: dict[str, str] = {} + sticker_sample_username: dict[str, str] = {} + sticker_key_username_counts: dict[str, Counter[str]] = defaultdict(Counter) + sticker_key_first_ts_in_year: dict[str, int] = {} + + used_index = False + + emoji_table = _load_wechat_emoji_table() + emoji_regex, emoji_norm_to_key = _load_wechat_text_emoji_matcher() + expression_id_to_asset, expression_id_to_label = _load_wechat_expression_catalog() + + index_path = get_chat_search_index_db_path(account_dir) + if index_path.exists(): + conn = sqlite3.connect(str(index_path)) + try: + has_fts = ( + conn.execute( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name='message_fts' LIMIT 1" + ).fetchone() + is not None + ) + if has_fts and my_username: + used_index = True + ts_expr = ( + "CASE " + "WHEN CAST(create_time AS INTEGER) > 1000000000000 " + "THEN CAST(CAST(create_time AS INTEGER)/1000 AS INTEGER) " + "ELSE CAST(create_time AS INTEGER) " + "END" + ) + where_base = ( + f"{ts_expr} >= ? AND {ts_expr} < ? " + "AND db_stem NOT LIKE 'biz_message%' " + "AND sender_username = ?" + ) + + try: + r_total = conn.execute( + f"SELECT COUNT(1) FROM message_fts WHERE {where_base} AND CAST(local_type AS INTEGER) != 10000", + (start_ts, end_ts, my_username), + ).fetchone() + total_sent_messages = int((r_total[0] if r_total else 0) or 0) + except Exception: + total_sent_messages = 0 + + try: + r_sticker = conn.execute( + f"SELECT COUNT(1) FROM message_fts WHERE {where_base} AND CAST(local_type AS INTEGER) = 47", + (start_ts, end_ts, my_username), + ).fetchone() + sent_sticker_count = int((r_sticker[0] if r_sticker else 0) or 0) + except Exception: + sent_sticker_count = 0 + + try: + rows_u = conn.execute( + f"SELECT username, COUNT(1) AS cnt " + f"FROM message_fts WHERE {where_base} AND CAST(local_type AS INTEGER) = 47 " + "GROUP BY username", + (start_ts, end_ts, my_username), + ).fetchall() + except Exception: + rows_u = [] + for r in rows_u: + if not r: + continue + username = str(r[0] or "").strip() + if not username: + continue + try: + cnt = int(r[1] or 0) + except Exception: + cnt = 0 + if cnt > 0: + sticker_by_username[username] += cnt + + try: + rows_t = conn.execute( + "SELECT " + "date(datetime(ts, 'unixepoch', 'localtime')) AS d, " + "CAST(strftime('%H', datetime(ts, 'unixepoch', 'localtime')) AS INTEGER) AS h, " + "CAST(strftime('%w', datetime(ts, 'unixepoch', 'localtime')) AS INTEGER) AS w " + "FROM (" + f" SELECT {ts_expr} AS ts " + " FROM message_fts " + f" WHERE {where_base} AND CAST(local_type AS INTEGER) = 47" + ") sub", + (start_ts, end_ts, my_username), + ).fetchall() + except Exception: + rows_t = [] + for r in rows_t: + if not r: + continue + d = str(r[0] or "").strip() + try: + h = int(r[1] if r[1] is not None else -1) + except Exception: + h = -1 + try: + w0 = int(r[2] if r[2] is not None else -1) + except Exception: + w0 = -1 + if d: + sticker_active_days.add(d) + if 0 <= h <= 23: + hour_counts[h] += 1 + if 0 <= w0 <= 6: + # sqlite: 0=Sun..6=Sat -> 0=Mon..6=Sun + w = 6 if w0 == 0 else (w0 - 1) + weekday_counts[w] += 1 + + try: + rows_text = conn.execute( + f"SELECT \"text\" FROM message_fts " + f"WHERE {where_base} AND render_type = 'text' " + "AND \"text\" IS NOT NULL AND TRIM(\"text\") != ''", + (start_ts, end_ts, my_username), + ).fetchall() + except Exception: + rows_text = [] + for r in rows_text: + txt = str((r[0] if r else "") or "") + if not txt: + continue + txt_norm = _normalize_index_text_for_emoji_match(txt) + if emoji_regex is not None and txt_norm: + for m in emoji_regex.finditer(txt_norm): + nk = str(m.group(0) or "") + k = emoji_norm_to_key.get(nk) or nk + if k: + text_emoji_counts[k] += 1 + for u in _extract_unicode_emoji_tokens(txt_norm): + if u: + unicode_emoji_counts[u] += 1 + finally: + try: + conn.close() + except Exception: + pass + + # Parse local_type=47 payloads from raw message DBs (md5/cdnurl), plus fallback counters when index missing. + session_usernames = _list_session_usernames(account_dir / "session.db") + md5_to_username: dict[str, str] = {} + table_to_username: dict[str, str] = {} + for u in session_usernames: + md5_hex = hashlib.md5(u.encode("utf-8")).hexdigest().lower() + md5_to_username[md5_hex] = u + table_to_username[f"msg_{md5_hex}"] = u + table_to_username[f"chat_{md5_hex}"] = u + + def resolve_username_from_table(table_name: str) -> str: + ln = str(table_name or "").lower() + x = table_to_username.get(ln) + if x: + return x + m = _MD5_HEX_RE.search(ln) + if m: + return str(md5_to_username.get(m.group(0).lower()) or "") + return "" + + resource_conn: sqlite3.Connection | None = None + resource_chat_id_cache: dict[str, Optional[int]] = {} + resource_db_path = account_dir / "message_resource.db" + if resource_db_path.exists(): + try: + resource_conn = sqlite3.connect(str(resource_db_path)) + except Exception: + resource_conn = None + + ts_expr = ( + "CASE " + "WHEN CAST(create_time AS INTEGER) > 1000000000000 " + "THEN CAST(CAST(create_time AS INTEGER)/1000 AS INTEGER) " + "ELSE CAST(create_time AS INTEGER) " + "END" + ) + + def _has_packed_info_data_column(conn: sqlite3.Connection, quoted_table: str) -> bool: + try: + cols = conn.execute(f"PRAGMA table_info({quoted_table})").fetchall() + except Exception: + return False + for c in cols: + if not c or len(c) < 2: + continue + name0 = c[1] + if isinstance(name0, memoryview): + name0 = name0.tobytes() + if isinstance(name0, (bytes, bytearray)): + try: + name0 = bytes(name0).decode("utf-8", errors="ignore") + except Exception: + name0 = "" + if str(name0 or "").strip().lower() == "packed_info_data": + return True + return False + + def _extract_sticker_from_row( + *, + row: sqlite3.Row, + username: str, + record_maps: bool, + count_wechat_builtin: bool, + ) -> tuple[int, str, str]: + create_time_raw = int(row["create_time"] or 0) + ts = create_time_raw + if ts > 1_000_000_000_000: + ts = int(ts / 1000) + + raw_text = "" + try: + raw_text = _decode_message_content(row["compress_content"], row["message_content"]).strip() + except Exception: + raw_text = "" + + emoji_md5 = _extract_xml_attr(raw_text, "md5") or _extract_xml_tag_text(raw_text, "md5") + emoji_md5 = str(emoji_md5 or "").strip().lower() + + emoji_url = _extract_xml_attr(raw_text, "cdnurl") or _extract_xml_tag_text(raw_text, "cdn_url") + emoji_url = html.unescape(str(emoji_url or "").strip()) + + packed_emoji_id: Optional[int] = None + try: + _, packed_emoji_id = _extract_packed_emoji_meta(row["packed_info_data"]) + except Exception: + packed_emoji_id = None + + if (not emoji_md5) and resource_conn is not None: + chat_id = resource_chat_id_cache.get(username) + if username not in resource_chat_id_cache: + chat_id = _resource_lookup_chat_id(resource_conn, username) + resource_chat_id_cache[username] = chat_id + try: + emoji_md5 = _lookup_resource_md5( + resource_conn, + chat_id, + message_local_type=47, + server_id=int(row["server_id"] or 0), + local_id=int(row["local_id"] or 0), + create_time=create_time_raw, + ) + except Exception: + emoji_md5 = "" + + emoji_md5 = str(emoji_md5 or "").strip().lower() + sticker_key = "" + if emoji_md5: + sticker_key = f"md5:{emoji_md5}" + if record_maps: + sticker_key_md5[sticker_key] = emoji_md5 + elif packed_emoji_id is not None and int(packed_emoji_id) > 0: + expr_id = int(packed_emoji_id) + sticker_key = f"expr:{expr_id}" + if record_maps: + sticker_key_expr_id[sticker_key] = expr_id + if count_wechat_builtin and expr_id in expression_id_to_asset: + wechat_emoji_counts[expr_id] += 1 + + return ts, sticker_key, emoji_url + + db_paths = [p for p in _iter_message_db_paths(account_dir) if not p.name.lower().startswith("biz_message")] + for db_path in db_paths: + if not db_path.exists(): + continue + + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + conn.text_factory = bytes + + my_rowid: Optional[int] = None + try: + r2 = conn.execute( + "SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1", + (my_username,), + ).fetchone() + if r2 is not None and r2[0] is not None: + my_rowid = int(r2[0]) + except Exception: + my_rowid = None + if my_rowid is None: + continue + + tables = _list_message_tables(conn) + for table_name in tables: + username = resolve_username_from_table(table_name) + qt = _quote_ident(table_name) + + # Fallback-only counters when search index is unavailable. + if not used_index: + try: + r_total = conn.execute( + f"SELECT COUNT(1) FROM {qt} " + f"WHERE {ts_expr} >= ? AND {ts_expr} < ? " + "AND real_sender_id = ? AND CAST(local_type AS INTEGER) != 10000", + (start_ts, end_ts, my_rowid), + ).fetchone() + total_sent_messages += int((r_total[0] if r_total else 0) or 0) + except Exception: + pass + + try: + rows_text = conn.execute( + f"SELECT message_content, compress_content FROM {qt} " + f"WHERE {ts_expr} >= ? AND {ts_expr} < ? " + "AND real_sender_id = ? AND CAST(local_type AS INTEGER) = 1", + (start_ts, end_ts, my_rowid), + ).fetchall() + except Exception: + rows_text = [] + for rt in rows_text: + try: + txt = _decode_message_content(rt["compress_content"], rt["message_content"]).strip() + except Exception: + txt = "" + if not txt: + continue + txt_norm = _normalize_index_text_for_emoji_match(txt) + if emoji_regex is not None and txt_norm: + for m in emoji_regex.finditer(txt_norm): + nk = str(m.group(0) or "") + k = emoji_norm_to_key.get(nk) or nk + if k: + text_emoji_counts[k] += 1 + for u in _extract_unicode_emoji_tokens(txt_norm): + if u: + unicode_emoji_counts[u] += 1 + + try: + packed_info_expr = "packed_info_data" if _has_packed_info_data_column(conn, qt) else "NULL AS packed_info_data" + rows_emoji = conn.execute( + f"SELECT server_id, local_id, create_time, message_content, compress_content, {packed_info_expr} " + f"FROM {qt} " + f"WHERE {ts_expr} >= ? AND {ts_expr} < ? " + "AND real_sender_id = ? AND CAST(local_type AS INTEGER) = 47", + (start_ts, end_ts, my_rowid), + ).fetchall() + except Exception: + rows_emoji = [] + + for r in rows_emoji: + ts, sticker_key, emoji_url = _extract_sticker_from_row( + row=r, + username=username, + record_maps=True, + count_wechat_builtin=True, + ) + + if not used_index: + sent_sticker_count += 1 + if ts > 0: + dt = datetime.fromtimestamp(ts) + sticker_active_days.add(dt.strftime("%Y-%m-%d")) + hour_counts[dt.hour] += 1 + sticker_by_username[username] += 1 + weekday_counts[dt.weekday()] += 1 + + if not sticker_key: + continue + + sticker_key_counts[sticker_key] += 1 + prev_first_ts = sticker_key_first_ts_in_year.get(sticker_key) + if ts > 0 and (prev_first_ts is None or ts < prev_first_ts): + sticker_key_first_ts_in_year[sticker_key] = ts + if emoji_url and (sticker_key not in sticker_url_map): + sticker_url_map[sticker_key] = emoji_url + if username and (sticker_key not in sticker_sample_username): + sticker_sample_username[sticker_key] = username + if username: + sticker_key_username_counts[sticker_key][username] += 1 + finally: + if conn is not None: + try: + conn.close() + except Exception: + pass + + sticker_keys_in_year = set(sticker_key_counts.keys()) + sticker_key_last_ts_before_year: dict[str, int] = {} + if sticker_keys_in_year and my_username: + for db_path in db_paths: + if not db_path.exists(): + continue + + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + conn.text_factory = bytes + + my_rowid: Optional[int] = None + try: + r2 = conn.execute( + "SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1", + (my_username,), + ).fetchone() + if r2 is not None and r2[0] is not None: + my_rowid = int(r2[0]) + except Exception: + my_rowid = None + if my_rowid is None: + continue + + tables = _list_message_tables(conn) + for table_name in tables: + username = resolve_username_from_table(table_name) + qt = _quote_ident(table_name) + packed_info_expr = ( + "packed_info_data" if _has_packed_info_data_column(conn, qt) else "NULL AS packed_info_data" + ) + try: + rows_hist = conn.execute( + f"SELECT server_id, local_id, create_time, message_content, compress_content, {packed_info_expr} " + f"FROM {qt} " + f"WHERE {ts_expr} < ? " + "AND real_sender_id = ? AND CAST(local_type AS INTEGER) = 47", + (start_ts, my_rowid), + ) + except Exception: + rows_hist = [] + + for r in rows_hist: + ts, sticker_key, _ = _extract_sticker_from_row( + row=r, + username=username, + record_maps=False, + count_wechat_builtin=False, + ) + if (not sticker_key) or (sticker_key not in sticker_keys_in_year) or ts <= 0: + continue + prev_ts = sticker_key_last_ts_before_year.get(sticker_key) + if prev_ts is None or ts > prev_ts: + sticker_key_last_ts_before_year[sticker_key] = ts + finally: + if conn is not None: + try: + conn.close() + except Exception: + pass + + if resource_conn is not None: + try: + resource_conn.close() + except Exception: + pass + + # Prefer index total when available, but keep non-negative relationship. + if used_index: + sent_sticker_count = max(int(sent_sticker_count), int(sum(sticker_key_counts.values())), int(sent_sticker_count)) + + sent_sticker_count = int(sent_sticker_count) + sticker_days = int(len(sticker_active_days)) + sticker_per_day = (float(sent_sticker_count) / float(sticker_days)) if sticker_days > 0 else 0.0 + sticker_share = (float(sent_sticker_count) / float(total_sent_messages)) if total_sent_messages > 0 else 0.0 + unique_sticker_type_count = int(len(sticker_key_counts)) + revive_gap_days_threshold = 60 + new_sticker_count_this_year = 0 + revived_sticker_count = 0 + revived_max_gap_days = 0 + new_sticker_keys_in_year: set[str] = set() + revived_sticker_keys_in_year: set[str] = set() + revived_gap_days_by_key: dict[str, int] = {} + for sticker_key, first_ts in sticker_key_first_ts_in_year.items(): + if first_ts <= 0: + continue + prev_ts = sticker_key_last_ts_before_year.get(sticker_key) + if prev_ts is None or prev_ts <= 0: + new_sticker_count_this_year += 1 + new_sticker_keys_in_year.add(sticker_key) + continue + gap_days = int(max(0, (int(first_ts) - int(prev_ts))) // 86400) + if gap_days >= revive_gap_days_threshold: + revived_sticker_count += 1 + revived_sticker_keys_in_year.add(sticker_key) + revived_gap_days_by_key[sticker_key] = int(gap_days) + if gap_days > revived_max_gap_days: + revived_max_gap_days = gap_days + new_sticker_share = ( + float(new_sticker_count_this_year) / float(unique_sticker_type_count) + if unique_sticker_type_count > 0 + else 0.0 + ) + revived_sticker_share = ( + float(revived_sticker_count) / float(unique_sticker_type_count) + if unique_sticker_type_count > 0 + else 0.0 + ) + + peak_hour: Optional[int] = None + if hour_counts: + peak_hour = max(range(24), key=lambda h: (int(hour_counts.get(h, 0)), -h)) + + peak_weekday: Optional[int] = None + if weekday_counts: + peak_weekday = max(range(7), key=lambda w: (int(weekday_counts.get(w, 0)), -w)) + peak_weekday_name = _weekday_name_zh(peak_weekday if peak_weekday is not None else -1) + + def pick_sticker_owner_username(sticker_key: str) -> str: + counts = sticker_key_username_counts.get(sticker_key) + if counts: + try: + return sorted(counts.items(), key=lambda kv: (-int(kv[1]), str(kv[0])))[0][0] + except Exception: + pass + return str(sticker_sample_username.get(sticker_key) or "") + + top_stickers_raw = sorted(sticker_key_counts.items(), key=lambda kv: (-int(kv[1]), str(kv[0])))[:6] + new_sticker_samples_raw = sorted( + [ + (k, int(sticker_key_counts.get(k, 0))) + for k in new_sticker_keys_in_year + if int(sticker_key_counts.get(k, 0)) > 0 + ], + key=lambda kv: (-int(kv[1]), str(kv[0])), + )[:4] + revived_sticker_samples_raw = sorted( + [ + (k, int(sticker_key_counts.get(k, 0))) + for k in revived_sticker_keys_in_year + if int(sticker_key_counts.get(k, 0)) > 0 + ], + key=lambda kv: (-int(kv[1]), str(kv[0])), + )[:4] + + sample_sticker_keys = [k for k, _ in top_stickers_raw + new_sticker_samples_raw + revived_sticker_samples_raw] + sample_usernames = [pick_sticker_owner_username(key) for key in sample_sticker_keys] + sample_contact_rows = _load_contact_rows( + account_dir / "contact.db", + [u for u in sample_usernames if u], + ) + + def build_sticker_stat_item(key: str, cnt: int) -> dict[str, Any]: + md5 = str(sticker_key_md5.get(key) or "") + expr_id = int(sticker_key_expr_id.get(key) or 0) + sample_username = pick_sticker_owner_username(key) + remote_url = str(sticker_url_map.get(key) or "") + sample_row = sample_contact_rows.get(sample_username) if sample_username else None + sample_display = _pick_display_name(sample_row, sample_username) if sample_username else "" + sample_avatar_url = _build_avatar_url(str(account_dir.name or ""), sample_username) if sample_username else "" + expr_asset = str(expression_id_to_asset.get(expr_id) or "") if expr_id > 0 else "" + expr_label = str(expression_id_to_label.get(expr_id) or "") if expr_id > 0 else "" + local_url = ( + _build_local_emoji_url( + account_name=str(account_dir.name or ""), + md5=str(md5), + username=sample_username, + emoji_remote_url=remote_url, + ) + if md5 + else (f"/wxemoji/{expr_asset}" if expr_asset else "") + ) + ratio = (float(cnt) / float(sent_sticker_count)) if sent_sticker_count > 0 else 0.0 + return { + "md5": str(md5 or key), + "count": int(cnt), + "ratio": float(ratio), + "emojiUrl": local_url, + "emojiRemoteUrl": remote_url, + "emojiId": int(expr_id) if expr_id > 0 else None, + "emojiAssetPath": f"/wxemoji/{expr_asset}" if expr_asset else "", + "emojiLabel": expr_label, + "sampleUsername": sample_username, + "sampleDisplayName": sample_display, + "sampleAvatarUrl": sample_avatar_url, + } + + top_stickers: list[dict[str, Any]] = [build_sticker_stat_item(key, cnt) for key, cnt in top_stickers_raw] + new_sticker_samples: list[dict[str, Any]] = [ + build_sticker_stat_item(key, cnt) for key, cnt in new_sticker_samples_raw + ] + revived_sticker_samples: list[dict[str, Any]] = [] + for key, cnt in revived_sticker_samples_raw: + item = build_sticker_stat_item(key, cnt) + item["gapDays"] = int(revived_gap_days_by_key.get(key) or 0) + revived_sticker_samples.append(item) + + top_wechat_emojis_raw = sorted(wechat_emoji_counts.items(), key=lambda kv: (-int(kv[1]), int(kv[0])))[:8] + top_wechat_emojis: list[dict[str, Any]] = [] + for expr_id, cnt in top_wechat_emojis_raw: + expr_asset = str(expression_id_to_asset.get(int(expr_id)) or "") + expr_label = str(expression_id_to_label.get(int(expr_id)) or f"[表情{int(expr_id)}]") + top_wechat_emojis.append( + { + "id": int(expr_id), + "key": expr_label, + "count": int(cnt), + "assetPath": f"/wxemoji/{expr_asset}" if expr_asset else "", + } + ) + + top_text_emojis_raw = sorted(text_emoji_counts.items(), key=lambda kv: (-int(kv[1]), str(kv[0])))[:6] + top_text_emojis: list[dict[str, Any]] = [] + for key, cnt in top_text_emojis_raw: + asset = str(emoji_table.get(key) or "") + top_text_emojis.append( + { + "key": str(key), + "count": int(cnt), + "assetPath": f"/wxemoji/{asset}" if asset else "", + } + ) + + top_unicode_emojis_raw = sorted(unicode_emoji_counts.items(), key=lambda kv: (-int(kv[1]), str(kv[0])))[:8] + top_unicode_emojis: list[dict[str, Any]] = [] + for key, cnt in top_unicode_emojis_raw: + top_unicode_emojis.append({"emoji": str(key), "count": int(cnt)}) + + top_battle_partner_obj: dict[str, Any] = { + "username": "", + "displayName": "", + "maskedName": "", + "avatarUrl": "", + "stickerCount": 0, + } + battle_candidates = [ + (u, c) + for u, c in sticker_by_username.items() + if u + and (not u.endswith("@chatroom")) + and _should_keep_session(u, include_official=False) + and int(c) > 0 + ] + if battle_candidates: + top_u, top_cnt = sorted(battle_candidates, key=lambda kv: (-int(kv[1]), str(kv[0])))[0] + rows = _load_contact_rows(account_dir / "contact.db", [top_u]) + row = rows.get(top_u) + display = _pick_display_name(row, top_u) + top_battle_partner_obj = { + "username": top_u, + "displayName": display, + "maskedName": display, + "avatarUrl": _build_avatar_url(str(account_dir.name or ""), top_u), + "stickerCount": int(top_cnt), + } + + top_text = top_text_emojis[0] if top_text_emojis else None + top_wechat = top_wechat_emojis[0] if top_wechat_emojis else None + persona = _pick_persona( + sent_sticker_count=sent_sticker_count, + sticker_share=float(sticker_share), + peak_hour=peak_hour, + top_text_emoji_count=int((top_text.get("count") if top_text else 0) or 0) + + int((top_wechat.get("count") if top_wechat else 0) or 0), + ) + + lines: list[str] = [] + if sent_sticker_count > 0: + lines.append( + f"这一年,你用 {sent_sticker_count:,} 张表情包把聊天变得更有温度;在 {sticker_days:,} 个活跃日里,日均 {sticker_per_day:.1f} 张。" + ) + else: + lines.append("这一年你几乎没发过表情包。") + + if peak_hour is not None and peak_weekday_name: + lines.append(f"你最活跃的时刻是 {peak_weekday_name} {peak_hour}:00。") + + if top_stickers: + top0 = top_stickers[0] + label0 = str(top0.get("emojiLabel") or "") + if label0: + lines.append(f"年度 C 位表情是 {label0}({int(top0['count']):,} 次)。") + else: + lines.append(f"年度 C 位表情是 {top0['md5'][:8]}…({int(top0['count']):,} 次)。") + + if top_wechat: + lines.append(f"你最常用的小黄脸是 {top_wechat['key']},共 {int(top_wechat['count']):,} 次。") + elif top_text: + lines.append(f"在文字聊天里,你最常打的小黄脸是 {top_text['key']},共 {int(top_text['count']):,} 次。") + if top_unicode_emojis: + lines.append(f"普通 Emoji 最常用 {top_unicode_emojis[0]['emoji']},共 {int(top_unicode_emojis[0]['count']):,} 次。") + + if int(top_battle_partner_obj.get("stickerCount") or 0) > 0: + lines.append( + f"和你斗图最狠的是 {top_battle_partner_obj['displayName']}({int(top_battle_partner_obj['stickerCount']):,} 发)。" + ) + + lines.append(f"年度人格:{persona['label']}。") + + return { + "year": int(year), + "sentStickerCount": int(sent_sticker_count), + "stickerActiveDays": int(sticker_days), + "stickerPerActiveDay": float(sticker_per_day), + "stickerShareOfSentMessages": float(sticker_share), + "uniqueStickerTypeCount": int(unique_sticker_type_count), + "newStickerCountThisYear": int(new_sticker_count_this_year), + "newStickerShare": float(new_sticker_share), + "newStickerSamples": new_sticker_samples, + "revivedStickerCount": int(revived_sticker_count), + "revivedStickerShare": float(revived_sticker_share), + "revivedMinGapDays": int(revive_gap_days_threshold), + "revivedMaxGapDays": int(revived_max_gap_days), + "revivedStickerSamples": revived_sticker_samples, + "peakHour": int(peak_hour) if peak_hour is not None else None, + "peakWeekday": int(peak_weekday) if peak_weekday is not None else None, + "peakWeekdayName": peak_weekday_name, + "stickerHourCounts": [int(hour_counts.get(h, 0)) for h in range(24)], + "stickerWeekdayCounts": [int(weekday_counts.get(w, 0)) for w in range(7)], + "topStickers": top_stickers, + "topWechatEmojis": top_wechat_emojis, + "topTextEmojis": top_text_emojis, + "topUnicodeEmojis": top_unicode_emojis, + "topBattlePartner": top_battle_partner_obj, + "persona": persona, + "lines": lines, + "settings": {"usedIndex": bool(used_index)}, + } + + +def build_card_04_emoji_universe(*, account_dir: Path, year: int) -> dict[str, Any]: + data = compute_emoji_universe_stats(account_dir=account_dir, year=year) + + sent_sticker_count = int(data.get("sentStickerCount") or 0) + sticker_days = int(data.get("stickerActiveDays") or 0) + sticker_per_day = float(data.get("stickerPerActiveDay") or 0.0) + top_stickers = list(data.get("topStickers") or []) + top_wechat_emojis = list(data.get("topWechatEmojis") or []) + top_text_emojis = list(data.get("topTextEmojis") or []) + top_unicode_emojis = list(data.get("topUnicodeEmojis") or []) + peak_weekday_name = str(data.get("peakWeekdayName") or "") + peak_hour = data.get("peakHour") + + if sent_sticker_count <= 0 and (not top_wechat_emojis) and (not top_text_emojis) and (not top_unicode_emojis): + narrative = "今年你几乎没用表情表达。" + else: + parts: list[str] = [] + if sent_sticker_count > 0: + parts.append( + f"这一年,你用 {sent_sticker_count:,} 张表情包把聊天变得更有温度;在 {sticker_days:,} 个活跃日里,日均 {sticker_per_day:.1f} 张。" + ) + if peak_hour is not None and peak_weekday_name: + parts.append(f"你最活跃的时刻是 {peak_weekday_name} {int(peak_hour)}:00。") + tail_parts: list[str] = [] + if top_stickers: + x = top_stickers[0] + label0 = str(x.get("emojiLabel") or "").strip() + if label0: + tail_parts.append(f"年度 C 位表情是 {label0}({int(x.get('count') or 0):,} 次)") + else: + tail_parts.append(f"年度 C 位表情是 {str(x.get('md5') or '')[:8]}…({int(x.get('count') or 0):,} 次)") + if top_wechat_emojis: + x = top_wechat_emojis[0] + tail_parts.append(f"你最常用的小黄脸是 {str(x.get('key') or '')}({int(x.get('count') or 0):,} 次)") + elif top_text_emojis: + x = top_text_emojis[0] + tail_parts.append(f"在文字聊天里,你最常打的小黄脸是 {str(x.get('key') or '')}({int(x.get('count') or 0):,} 次)") + if top_unicode_emojis: + x = top_unicode_emojis[0] + tail_parts.append(f"普通 Emoji 最常用 {str(x.get('emoji') or '')}({int(x.get('count') or 0):,} 次)") + if tail_parts: + parts.append(",".join(tail_parts) + "。") + narrative = "".join(parts) + + return { + "id": 4, + "title": "这一年,你的表情包里藏了多少心情?", + "scope": "global", + "category": "B", + "status": "ok", + "kind": "emoji/annual_universe", + "narrative": narrative, + "data": data, + } diff --git a/src/wechat_decrypt_tool/wrapped/service.py b/src/wechat_decrypt_tool/wrapped/service.py index a6e179d..8417768 100644 --- a/src/wechat_decrypt_tool/wrapped/service.py +++ b/src/wechat_decrypt_tool/wrapped/service.py @@ -16,15 +16,16 @@ from .cards.card_00_global_overview import build_card_00_global_overview from .cards.card_01_cyber_schedule import WeekdayHourHeatmap, build_card_01_cyber_schedule, compute_weekday_hour_heatmap from .cards.card_02_message_chars import build_card_02_message_chars from .cards.card_03_reply_speed import build_card_03_reply_speed +from .cards.card_04_emoji_universe import build_card_04_emoji_universe logger = get_logger(__name__) # We use this number to version the cache filename so adding more cards won't accidentally serve # an older partial cache. -_IMPLEMENTED_UPTO_ID = 3 +_IMPLEMENTED_UPTO_ID = 4 # Bump this when we change card payloads/ordering while keeping the same implemented_upto. -_CACHE_VERSION = 9 +_CACHE_VERSION = 15 # "Manifest" is used by the frontend to render the deck quickly, then lazily fetch each card. @@ -58,6 +59,13 @@ _WRAPPED_CARD_MANIFEST: tuple[dict[str, Any], ...] = ( "category": "B", "kind": "chat/reply_speed", }, + { + "id": 4, + "title": "这一年,你的表情包里藏了多少心情?", + "scope": "global", + "category": "B", + "kind": "emoji/annual_universe", + }, ) _WRAPPED_CARD_ID_SET = {int(c["id"]) for c in _WRAPPED_CARD_MANIFEST} @@ -274,7 +282,7 @@ def build_wrapped_annual_response( ) -> dict[str, Any]: """Build annual wrapped response for the given account/year. - For now we implement cards up to id=3 (plus a meta overview card id=0). + For now we implement cards up to id=4 (plus a meta overview card id=0). """ account_dir = _resolve_account_dir(account) @@ -317,6 +325,8 @@ def build_wrapped_annual_response( cards.append(build_card_02_message_chars(account_dir=account_dir, year=y)) # Page 5: reply speed / best chat buddy. cards.append(build_card_03_reply_speed(account_dir=account_dir, year=y)) + # Page 6: annual emoji universe / meme almanac. + cards.append(build_card_04_emoji_universe(account_dir=account_dir, year=y)) obj: dict[str, Any] = { "account": account_dir.name, @@ -508,6 +518,8 @@ def build_wrapped_annual_card( card = build_card_02_message_chars(account_dir=account_dir, year=y) elif cid == 3: card = build_card_03_reply_speed(account_dir=account_dir, year=y) + elif cid == 4: + card = build_card_04_emoji_universe(account_dir=account_dir, year=y) else: # Should be unreachable due to _WRAPPED_CARD_ID_SET check. raise ValueError(f"Unknown Wrapped card id: {cid}") diff --git a/tests/test_wrapped_emoji_universe.py b/tests/test_wrapped_emoji_universe.py new file mode 100644 index 0000000..4aed5c3 --- /dev/null +++ b/tests/test_wrapped_emoji_universe.py @@ -0,0 +1,773 @@ +import hashlib +import sqlite3 +import sys +import unittest +from datetime import datetime +from pathlib import Path +from tempfile import TemporaryDirectory + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + + +class TestWrappedEmojiUniverse(unittest.TestCase): + def _ts(self, y: int, m: int, d: int, h: int = 0, mi: int = 0, s: int = 0) -> int: + return int(datetime(y, m, d, h, mi, s).timestamp()) + + def _seed_contact_db(self, path: Path, *, account: str, usernames: list[str]) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE contact ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + """ + CREATE TABLE stranger ( + username TEXT, + remark TEXT, + nick_name TEXT, + alias TEXT, + local_type INTEGER, + verify_flag INTEGER, + big_head_url TEXT, + small_head_url TEXT + ) + """ + ) + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (account, "", "我", "", 1, 0, "", ""), + ) + for idx, username in enumerate(usernames): + conn.execute( + "INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (username, "", f"好友{idx + 1}", "", 1, 0, "", ""), + ) + conn.commit() + finally: + conn.close() + + def _seed_session_db(self, path: Path, *, usernames: list[str]) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE SessionTable ( + username TEXT, + is_hidden INTEGER, + sort_timestamp INTEGER + ) + """ + ) + for username in usernames: + conn.execute("INSERT INTO SessionTable VALUES (?, ?, ?)", (username, 0, 1735689600)) + conn.commit() + finally: + conn.close() + + def _seed_message_db( + self, + path: Path, + *, + account: str, + username: str, + rows: list[dict[str, object]], + ) -> None: + table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}" + conn = sqlite3.connect(str(path)) + try: + conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)") + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account)) + conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username)) + conn.execute( + f""" + CREATE TABLE {table_name} ( + local_id INTEGER, + server_id INTEGER, + local_type INTEGER, + sort_seq INTEGER, + real_sender_id INTEGER, + create_time INTEGER, + message_content TEXT, + compress_content BLOB, + packed_info_data BLOB + ) + """ + ) + for row in rows: + conn.execute( + f""" + INSERT INTO {table_name} + (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content, packed_info_data) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + int(row.get("local_id", 0)), + int(row.get("server_id", 0)), + int(row.get("local_type", 0)), + int(row.get("sort_seq", row.get("local_id", 0))), + int(row.get("real_sender_id", 1)), + int(row.get("create_time", 0)), + str(row.get("message_content", "")), + row.get("compress_content"), + row.get("packed_info_data"), + ), + ) + conn.commit() + finally: + conn.close() + + def _seed_index_db(self, path: Path, *, rows: list[dict[str, object]]) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute( + """ + CREATE TABLE message_fts ( + text TEXT, + username TEXT, + render_type TEXT, + create_time INTEGER, + sort_seq INTEGER, + local_id INTEGER, + server_id INTEGER, + local_type INTEGER, + db_stem TEXT, + table_name TEXT, + sender_username TEXT, + is_hidden INTEGER, + is_official INTEGER + ) + """ + ) + for row in rows: + conn.execute( + """ + INSERT INTO message_fts ( + text, username, render_type, create_time, sort_seq, local_id, server_id, local_type, + db_stem, table_name, sender_username, is_hidden, is_official + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + str(row.get("text", "")), + str(row.get("username", "")), + str(row.get("render_type", "")), + int(row.get("create_time", 0)), + int(row.get("sort_seq", 0)), + int(row.get("local_id", 0)), + int(row.get("server_id", 0)), + int(row.get("local_type", 0)), + str(row.get("db_stem", "message_0")), + str(row.get("table_name", "")), + str(row.get("sender_username", "")), + int(row.get("is_hidden", 0)), + int(row.get("is_official", 0)), + ), + ) + conn.commit() + finally: + conn.close() + + def _seed_resource_db( + self, + path: Path, + *, + username: str, + md5: str, + server_id: int, + local_id: int, + create_time: int, + ) -> None: + conn = sqlite3.connect(str(path)) + try: + conn.execute("CREATE TABLE ChatName2Id (user_name TEXT)") + conn.execute("INSERT INTO ChatName2Id (rowid, user_name) VALUES (?, ?)", (7, username)) + conn.execute( + """ + CREATE TABLE MessageResourceInfo ( + message_id INTEGER PRIMARY KEY AUTOINCREMENT, + message_svr_id INTEGER, + chat_id INTEGER, + message_local_type INTEGER, + packed_info BLOB, + message_local_id INTEGER, + message_create_time INTEGER + ) + """ + ) + packed = f"/tmp/{md5}.dat".encode("utf-8") + conn.execute( + """ + INSERT INTO MessageResourceInfo + (message_svr_id, chat_id, message_local_type, packed_info, message_local_id, message_create_time) + VALUES (?, ?, ?, ?, ?, ?) + """, + (int(server_id), 7, 47, packed, int(local_id), int(create_time)), + ) + conn.commit() + finally: + conn.close() + + def test_only_sticker_messages_outputs_core_stats(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_a" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + md5_a = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + md5_b = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + rows = [ + { + "local_id": 1, + "server_id": 1001, + "local_type": 47, + "create_time": self._ts(2025, 1, 1, 10, 5, 0), + "message_content": f'', + }, + { + "local_id": 2, + "server_id": 1002, + "local_type": 47, + "create_time": self._ts(2025, 1, 1, 10, 30, 0), + "message_content": f'', + }, + { + "local_id": 3, + "server_id": 1003, + "local_type": 47, + "create_time": self._ts(2025, 1, 2, 22, 10, 0), + "message_content": f'', + }, + ] + self._seed_message_db(account_dir / "message_0.db", account=account, username=friend, rows=rows) + + table_name = f"msg_{hashlib.md5(friend.encode('utf-8')).hexdigest()}" + fts_rows = [] + for row in rows: + fts_rows.append( + { + "text": "[表情]", + "username": friend, + "render_type": "emoji", + "create_time": row["create_time"], + "sort_seq": row["local_id"], + "local_id": row["local_id"], + "server_id": row["server_id"], + "local_type": 47, + "db_stem": "message_0", + "table_name": table_name, + "sender_username": account, + } + ) + self._seed_index_db(account_dir / "chat_search_index.db", rows=fts_rows) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + + self.assertTrue(data["settings"]["usedIndex"]) + self.assertEqual(data["sentStickerCount"], 3) + self.assertEqual(data["peakHour"], 10) + self.assertIsNotNone(data["peakWeekday"]) + self.assertEqual(data["topBattlePartner"]["username"], friend) + self.assertEqual(data["topBattlePartner"]["stickerCount"], 3) + self.assertEqual(data["topBattlePartner"]["maskedName"], data["topBattlePartner"]["displayName"]) + self.assertEqual(data["topStickers"][0]["md5"], md5_a) + self.assertEqual(data["topStickers"][0]["count"], 2) + self.assertTrue(str(data["topStickers"][0].get("sampleDisplayName") or "").strip()) + self.assertTrue(str(data["topStickers"][0].get("sampleAvatarUrl") or "").startswith("/api/chat/avatar")) + + def test_fallback_to_resource_md5_when_xml_missing(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_b" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + ts = self._ts(2025, 3, 8, 21, 0, 0) + rows = [ + { + "local_id": 11, + "server_id": 220011, + "local_type": 47, + "create_time": ts, + "message_content": '', + } + ] + self._seed_message_db(account_dir / "message_0.db", account=account, username=friend, rows=rows) + + md5_fallback = "cccccccccccccccccccccccccccccccc" + self._seed_resource_db( + account_dir / "message_resource.db", + username=friend, + md5=md5_fallback, + server_id=220011, + local_id=11, + create_time=ts, + ) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + + self.assertFalse(data["settings"]["usedIndex"]) + self.assertEqual(data["sentStickerCount"], 1) + self.assertEqual(data["topStickers"][0]["md5"], md5_fallback) + self.assertEqual(data["topStickers"][0]["count"], 1) + + def test_text_emoji_mapping_from_wechat_emojis_ts(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_c" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + table_name = f"msg_{hashlib.md5(friend.encode('utf-8')).hexdigest()}" + fts_rows = [ + { + "text": "早上好[微笑][微笑]🙂🙂", + "username": friend, + "render_type": "text", + "create_time": self._ts(2025, 4, 1, 9, 0, 0), + "local_id": 1, + "server_id": 901, + "local_type": 1, + "db_stem": "message_0", + "table_name": table_name, + "sender_username": account, + }, + { + "text": "晚上见[微笑][发呆]😂", + "username": friend, + "render_type": "text", + "create_time": self._ts(2025, 4, 1, 22, 0, 0), + "local_id": 2, + "server_id": 902, + "local_type": 1, + "db_stem": "message_0", + "table_name": table_name, + "sender_username": account, + }, + ] + self._seed_index_db(account_dir / "chat_search_index.db", rows=fts_rows) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + self.assertTrue(data["settings"]["usedIndex"]) + self.assertGreaterEqual(len(data["topTextEmojis"]), 1) + self.assertEqual(data["topTextEmojis"][0]["key"], "[微笑]") + self.assertEqual(data["topTextEmojis"][0]["count"], 3) + self.assertTrue(data["topTextEmojis"][0]["assetPath"].endswith("Expression_1@2x.png")) + self.assertGreaterEqual(len(data["topUnicodeEmojis"]), 1) + self.assertEqual(data["topUnicodeEmojis"][0]["emoji"], "🙂") + self.assertEqual(data["topUnicodeEmojis"][0]["count"], 2) + + def test_wechat_builtin_emoji_from_packed_info_data(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_e" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + # packed_info_data protobuf varints: + # 08 04 => field#1=4 + # 10 33 => field#2=51 (Expression_51@2x) + rows = [ + { + "local_id": 1, + "server_id": 501, + "local_type": 47, + "create_time": self._ts(2025, 7, 1, 10, 0, 0), + "message_content": "binary_emoji_payload_a", + "packed_info_data": bytes.fromhex("08041033"), + }, + { + "local_id": 2, + "server_id": 502, + "local_type": 47, + "create_time": self._ts(2025, 7, 1, 10, 1, 0), + "message_content": "binary_emoji_payload_b", + "packed_info_data": bytes.fromhex("08041033"), + }, + { + "local_id": 3, + "server_id": 503, + "local_type": 47, + "create_time": self._ts(2025, 7, 1, 11, 0, 0), + "message_content": "binary_emoji_payload_c", + "packed_info_data": bytes.fromhex("0804104a"), + }, + ] + self._seed_message_db(account_dir / "message_0.db", account=account, username=friend, rows=rows) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + + self.assertFalse(data["settings"]["usedIndex"]) + self.assertEqual(data["sentStickerCount"], 3) + self.assertGreaterEqual(len(data["topWechatEmojis"]), 1) + self.assertEqual(data["topWechatEmojis"][0]["id"], 51) + self.assertEqual(data["topWechatEmojis"][0]["count"], 2) + self.assertTrue(data["topWechatEmojis"][0]["assetPath"].endswith("Expression_51@2x.png")) + self.assertGreaterEqual(len(data["topStickers"]), 1) + self.assertEqual(data["topStickers"][0]["emojiId"], 51) + self.assertEqual(data["topStickers"][0]["count"], 2) + self.assertTrue(str(data["topStickers"][0].get("emojiAssetPath") or "").endswith("Expression_51@2x.png")) + + def test_index_counts_only_sent_messages(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_sent_only" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + rows = [ + { + "text": "[ 微 笑 ]", + "username": friend, + "render_type": "text", + "create_time": self._ts(2025, 6, 2, 9, 0, 0), + "local_id": 101, + "server_id": 4001, + "local_type": 1, + "table_name": "msg_dummy", + "sender_username": account, + }, + { + "text": "[ 发 呆 ]", + "username": friend, + "render_type": "text", + "create_time": self._ts(2025, 6, 2, 9, 1, 0), + "local_id": 102, + "server_id": 4002, + "local_type": 1, + "table_name": "msg_dummy", + "sender_username": friend, + }, + { + "text": "[表情]", + "username": friend, + "render_type": "emoji", + "create_time": self._ts(2025, 6, 2, 9, 2, 0), + "local_id": 201, + "server_id": 5001, + "local_type": 47, + "table_name": "msg_dummy", + "sender_username": account, + }, + { + "text": "[表情]", + "username": friend, + "render_type": "emoji", + "create_time": self._ts(2025, 6, 2, 9, 3, 0), + "local_id": 202, + "server_id": 5002, + "local_type": 47, + "table_name": "msg_dummy", + "sender_username": friend, + }, + ] + self._seed_index_db(account_dir / "chat_search_index.db", rows=rows) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + self.assertTrue(data["settings"]["usedIndex"]) + + self.assertEqual(data["sentStickerCount"], 1) + + keys = {x.get("key") for x in data.get("topTextEmojis") or []} + self.assertIn("[微笑]", keys) + self.assertNotIn("[发呆]", keys) + + def test_raw_db_counts_only_sent_messages(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_raw_dir" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + rows = [ + { + "local_id": 1, + "server_id": 1001, + "local_type": 1, + "real_sender_id": 1, + "create_time": self._ts(2025, 7, 1, 8, 0, 0), + "message_content": "/::B", + }, + { + "local_id": 2, + "server_id": 1002, + "local_type": 1, + "real_sender_id": 2, + "create_time": self._ts(2025, 7, 1, 8, 1, 0), + "message_content": "/::B", + }, + { + "local_id": 3, + "server_id": 1101, + "local_type": 47, + "real_sender_id": 1, + "create_time": self._ts(2025, 7, 1, 9, 0, 0), + "message_content": "binary_emoji_payload_a", + "packed_info_data": bytes.fromhex("08031033"), + }, + { + "local_id": 4, + "server_id": 1102, + "local_type": 47, + "real_sender_id": 2, + "create_time": self._ts(2025, 7, 1, 9, 1, 0), + "message_content": "binary_emoji_payload_b", + "packed_info_data": bytes.fromhex("08031033"), + }, + ] + self._seed_message_db(account_dir / "message_0.db", account=account, username=friend, rows=rows) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + + self.assertFalse(data["settings"]["usedIndex"]) + self.assertEqual(data["sentStickerCount"], 1) + self.assertEqual(data["topWechatEmojis"][0]["id"], 51) + self.assertEqual(data["topWechatEmojis"][0]["count"], 1) + + self.assertGreaterEqual(len(data["topTextEmojis"]), 1) + self.assertEqual(data["topTextEmojis"][0]["key"], "[色]") + self.assertEqual(data["topTextEmojis"][0]["count"], 1) + self.assertTrue(data["topTextEmojis"][0]["assetPath"].endswith("Expression_3@2x.png")) + + def test_new_and_revived_sticker_metrics(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_new_revived" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + md5_revived = "dddddddddddddddddddddddddddddddd" + md5_recent = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" + md5_new = "ffffffffffffffffffffffffffffffff" + rows = [ + { + "local_id": 1, + "server_id": 5001, + "local_type": 47, + "create_time": self._ts(2024, 1, 1, 9, 0, 0), + "message_content": f'', + }, + { + "local_id": 2, + "server_id": 5002, + "local_type": 47, + "create_time": self._ts(2024, 12, 28, 10, 0, 0), + "message_content": f'', + }, + { + "local_id": 3, + "server_id": 5003, + "local_type": 47, + "create_time": self._ts(2025, 1, 5, 11, 0, 0), + "message_content": f'', + }, + { + "local_id": 4, + "server_id": 5004, + "local_type": 47, + "create_time": self._ts(2025, 3, 15, 12, 0, 0), + "message_content": f'', + }, + { + "local_id": 5, + "server_id": 5005, + "local_type": 47, + "create_time": self._ts(2025, 5, 10, 13, 0, 0), + "message_content": f'', + }, + ] + self._seed_message_db(account_dir / "message_0.db", account=account, username=friend, rows=rows) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + + self.assertEqual(data["sentStickerCount"], 3) + self.assertEqual(data["uniqueStickerTypeCount"], 3) + self.assertEqual(data["newStickerCountThisYear"], 1) + self.assertEqual(data["revivedStickerCount"], 1) + self.assertEqual(data["revivedMinGapDays"], 60) + self.assertGreaterEqual(int(data.get("revivedMaxGapDays") or 0), 400) + new_samples = list(data.get("newStickerSamples") or []) + revived_samples = list(data.get("revivedStickerSamples") or []) + self.assertTrue(any(str(x.get("md5") or "") == md5_new for x in new_samples)) + self.assertTrue(any(str(x.get("md5") or "") == md5_revived for x in revived_samples)) + revived_item = next((x for x in revived_samples if str(x.get("md5") or "") == md5_revived), {}) + self.assertGreaterEqual(int(revived_item.get("gapDays") or 0), 400) + + def test_empty_year_returns_safe_empty_state(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import build_card_04_emoji_universe + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[]) + self._seed_session_db(account_dir / "session.db", usernames=[]) + + card = build_card_04_emoji_universe(account_dir=account_dir, year=2025) + self.assertEqual(card["id"], 4) + self.assertEqual(card["status"], "ok") + self.assertEqual(card["data"]["sentStickerCount"], 0) + self.assertIn("几乎没用表情表达", card["narrative"]) + self.assertIsInstance(card["data"]["lines"], list) + self.assertGreaterEqual(len(card["data"]["lines"]), 1) + self.assertEqual(card["data"].get("topUnicodeEmojis"), []) + + def test_tie_break_is_stable_by_key(self): + from wechat_decrypt_tool.wrapped.cards.card_04_emoji_universe import compute_emoji_universe_stats + + with TemporaryDirectory() as td: + root = Path(td) + account = "wxid_me" + friend = "wxid_friend_d" + account_dir = root / account + account_dir.mkdir(parents=True, exist_ok=True) + + self._seed_contact_db(account_dir / "contact.db", account=account, usernames=[friend]) + self._seed_session_db(account_dir / "session.db", usernames=[friend]) + + md5_a = "11111111111111111111111111111111" + md5_b = "22222222222222222222222222222222" + rows = [ + { + "local_id": 1, + "server_id": 301, + "local_type": 47, + "create_time": self._ts(2025, 6, 1, 8, 0, 0), + "message_content": f'', + }, + { + "local_id": 2, + "server_id": 302, + "local_type": 47, + "create_time": self._ts(2025, 6, 1, 8, 1, 0), + "message_content": f'', + }, + { + "local_id": 3, + "server_id": 303, + "local_type": 47, + "create_time": self._ts(2025, 6, 1, 8, 2, 0), + "message_content": f'', + }, + { + "local_id": 4, + "server_id": 304, + "local_type": 47, + "create_time": self._ts(2025, 6, 1, 8, 3, 0), + "message_content": f'', + }, + ] + self._seed_message_db(account_dir / "message_0.db", account=account, username=friend, rows=rows) + + table_name = f"msg_{hashlib.md5(friend.encode('utf-8')).hexdigest()}" + fts_rows = [] + for row in rows: + fts_rows.append( + { + "text": "[表情]", + "username": friend, + "render_type": "emoji", + "create_time": row["create_time"], + "local_id": row["local_id"], + "server_id": row["server_id"], + "local_type": 47, + "db_stem": "message_0", + "table_name": table_name, + "sender_username": account, + } + ) + fts_rows.extend( + [ + { + # `chat_search_index` stores text as char-tokens: "[微笑][发呆]" -> "[ 微 笑 ] [ 发 呆 ]" + "text": "[ 微 笑 ] [ 发 呆 ]", + "username": friend, + "render_type": "text", + "create_time": self._ts(2025, 6, 2, 9, 0, 0), + "local_id": 101, + "server_id": 4001, + "local_type": 1, + "db_stem": "message_0", + "table_name": table_name, + "sender_username": account, + }, + { + "text": "[ 发 呆 ] [ 微 笑 ]", + "username": friend, + "render_type": "text", + "create_time": self._ts(2025, 6, 2, 9, 1, 0), + "local_id": 102, + "server_id": 4002, + "local_type": 1, + "db_stem": "message_0", + "table_name": table_name, + "sender_username": account, + }, + ] + ) + self._seed_index_db(account_dir / "chat_search_index.db", rows=fts_rows) + + data = compute_emoji_universe_stats(account_dir=account_dir, year=2025) + + self.assertEqual(data["topStickers"][0]["md5"], md5_a) + expected_emoji_key = sorted(["[微笑]", "[发呆]"])[0] + self.assertEqual(data["topTextEmojis"][0]["key"], expected_emoji_key) + + +if __name__ == "__main__": + unittest.main() From 03f27a30ee8b211bb1846378aa0ff3c412889533 Mon Sep 17 00:00:00 2001 From: 2977094657 <2977094657@qq.com> Date: Fri, 13 Feb 2026 22:41:02 +0800 Subject: [PATCH 3/3] =?UTF-8?q?improvement(app-shell):=20=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E4=BE=A7=E8=BE=B9=E6=A0=8F=E6=98=BE=E7=A4=BA=E8=A7=84?= =?UTF-8?q?=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 首页与解密流程相关页面隐藏侧边栏,Wrapped 路由判断更完整 --- frontend/app.vue | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/frontend/app.vue b/frontend/app.vue index 6ee997a..be1176f 100644 --- a/frontend/app.vue +++ b/frontend/app.vue @@ -57,7 +57,12 @@ const contentClass = computed(() => : 'flex-1 overflow-auto min-h-0' ) -const showSidebar = computed(() => !String(route.path || '').startsWith('/wrapped')) +const showSidebar = computed(() => { + const path = String(route.path || '') + if (path === '/') return false + if (path === '/decrypt' || path === '/detection-result' || path === '/decrypt-result') return false + return !(path === '/wrapped' || path.startsWith('/wrapped/')) +})