Merge branch 'main' into feat/sns-media

2026-02-20 14:50:50 +08:00 · 2026-02-14 00:27:52 +08:00
parent 0a47b4d3be 03f27a30ee
commit 6af78d1e8e
25 changed files with 9077 additions and 86 deletions
--- a/src/wechat_decrypt_tool/chat_export_service.py
+++ b/src/wechat_decrypt_tool/chat_export_service.py
--- a/src/wechat_decrypt_tool/chat_helpers.py
+++ b/src/wechat_decrypt_tool/chat_helpers.py
@@ -8,7 +8,7 @@ from collections import Counter
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Optional
-from urllib.parse import quote, urlparse
+from urllib.parse import parse_qs, quote, urlparse

 from fastapi import HTTPException

@@ -634,6 +634,32 @@ def _is_mp_weixin_article_url(url: str) -> bool:
    return "mp.weixin.qq.com/" in lu


+def _is_mp_weixin_feed_article_url(url: str) -> bool:
+    """Detect WeChat's PC feed/recommendation mp.weixin.qq.com share URLs.
+
+    These links often carry an `exptype` like:
+      masonry_feed_brief_content_elite_for_pcfeeds_u2i
+
+    WeChat desktop tends to render them in a cover-card style (image + bottom title),
+    so we use this as a hint to choose the 'cover' linkStyle.
+    """
+
+    u = str(url or "").strip()
+    if not u:
+        return False
+
+    try:
+        parsed = urlparse(u)
+        q = parse_qs(parsed.query or "")
+        for v in (q.get("exptype") or []):
+            if "masonry_feed" in str(v or "").lower():
+                return True
+    except Exception:
+        pass
+
+    return "exptype=masonry_feed" in u.lower()
+
+
 def _classify_link_share(*, app_type: int, url: str, source_username: str, desc: str) -> tuple[str, str]:
    src = str(source_username or "").strip().lower()
    is_official_article = bool(
@@ -647,7 +673,15 @@ def _classify_link_share(*, app_type: int, url: str, source_username: str, desc:
    hashtag_count = len(re.findall(r"#[^#\s]+", d))

    # 公众号文章中「封面图 + 底栏标题」卡片特征：摘要以 #话题# 风格为主。
-    link_style = "cover" if (is_official_article and (d.startswith("#") or hashtag_count >= 2)) else "default"
+    cover_like = bool(
+        is_official_article
+        and (
+            d.startswith("#")
+            or hashtag_count >= 2
+            or _is_mp_weixin_feed_article_url(url)
+        )
+    )
+    link_style = "cover" if cover_like else "default"
    return link_type, link_style


@@ -948,8 +982,12 @@ def _parse_app_message(text: str) -> dict[str, Any]:
            "recordItem": record_item or "",
        }

-    if app_type in (5, 68) and url:
-        thumb_url = _normalize_xml_url(_extract_xml_tag_text(text, "thumburl"))
+    if app_type in (4, 5, 68) and url:
+        # Many appmsg link cards (notably Bilibili shares with <type>4</type>) include a <patMsg> metadata block.
+        # DO NOT treat "<patmsg" presence as a pat message: it would misclassify normal link cards as "[拍一拍]".
+        thumb_url = _normalize_xml_url(
+            _extract_xml_tag_text(text, "thumburl") or _extract_xml_tag_text(text, "cdnthumburl")
+        )
        link_type, link_style = _classify_link_share(
            app_type=app_type,
            url=url,
@@ -1093,7 +1131,10 @@ def _parse_app_message(text: str) -> dict[str, Any]:
            "quoteVoiceLength": quote_voice_length,
        }

-    if app_type == 62 or "<patmsg" in lower or 'type="patmsg"' in lower or "type='patmsg'" in lower:
+    # Some versions may mark pat messages via sysmsg/appmsg tag attribute: <sysmsg type="patmsg">...</sysmsg>.
+    # Be strict here: lots of non-pat appmsg payloads still carry a nested <patMsg>...</patMsg> metadata block.
+    patmsg_attr = bool(re.search(r"<(sysmsg|appmsg)\b[^>]*\btype=['\"]patmsg['\"]", lower))
+    if app_type == 62 or patmsg_attr:
        return {"renderType": "system", "content": "[拍一拍]"}

    if app_type == 2000 or (
--- a/src/wechat_decrypt_tool/routers/chat.py
+++ b/src/wechat_decrypt_tool/routers/chat.py
@@ -2742,6 +2742,90 @@ def _postprocess_transfer_messages(merged: list[dict[str, Any]]) -> None:
    # - 将原始转账消息（1/8）回填为“已被接收”
    # - 若同一 transferId 同时存在原始消息与 paysubtype=3 消息，则将 paysubtype=3 的那条校正为“已收款”

+    def _is_transfer_expired_system_message(text: Any) -> bool:
+        content = str(text or "").strip()
+        if not content:
+            return False
+        if "转账" not in content or "过期" not in content:
+            return False
+        if "未接收" in content and ("24小时" in content or "二十四小时" in content):
+            return True
+        return "已过期" in content and ("收款方" in content or "转账" in content)
+
+    def _mark_pending_transfers_expired_by_system_messages() -> set[str]:
+        expired_system_times: list[int] = []
+        pending_candidates: list[tuple[int, int]] = []  # (index, createTime)
+
+        for idx, msg in enumerate(merged):
+            rt = str(msg.get("renderType") or "").strip()
+            if rt == "system":
+                if _is_transfer_expired_system_message(msg.get("content")):
+                    try:
+                        ts = int(msg.get("createTime") or 0)
+                    except Exception:
+                        ts = 0
+                    if ts > 0:
+                        expired_system_times.append(ts)
+                continue
+
+            if rt != "transfer":
+                continue
+
+            pst = str(msg.get("paySubType") or "").strip()
+            if pst not in ("1", "8"):
+                continue
+
+            try:
+                ts = int(msg.get("createTime") or 0)
+            except Exception:
+                ts = 0
+            if ts <= 0:
+                continue
+
+            pending_candidates.append((idx, ts))
+
+        if not expired_system_times or not pending_candidates:
+            return set()
+
+        used_pending_indexes: set[int] = set()
+        expired_transfer_ids: set[str] = set()
+
+        # 过期系统提示通常出现在转账发起约 24 小时后。
+        # 为避免误匹配，要求时间差落在 [22h, 26h] 范围内，并选择最接近 24h 的待收款消息。
+        for sys_ts in sorted(expired_system_times):
+            best_index = -1
+            best_distance = 10**9
+
+            for idx, transfer_ts in pending_candidates:
+                if idx in used_pending_indexes:
+                    continue
+                delta = sys_ts - transfer_ts
+                if delta < 0:
+                    continue
+                if delta < 22 * 3600 or delta > 26 * 3600:
+                    continue
+
+                distance = abs(delta - 24 * 3600)
+                if distance < best_distance:
+                    best_distance = distance
+                    best_index = idx
+
+            if best_index < 0:
+                continue
+
+            used_pending_indexes.add(best_index)
+            transfer_msg = merged[best_index]
+            transfer_msg["paySubType"] = "10"
+            transfer_msg["transferStatus"] = "已过期"
+
+            tid = str(transfer_msg.get("transferId") or "").strip()
+            if tid:
+                expired_transfer_ids.add(tid)
+
+        return expired_transfer_ids
+
+    expired_transfer_ids = _mark_pending_transfers_expired_by_system_messages()
+
    returned_transfer_ids: set[str] = set()  # 退还状态的 transferId
    received_transfer_ids: set[str] = set()  # 已收款状态的 transferId
    returned_amounts_with_time: list[tuple[str, int]] = []  # (金额, 时间戳) 用于退还回退匹配
@@ -2828,6 +2912,8 @@ def _postprocess_transfer_messages(merged: list[dict[str, Any]]) -> None:
        tid = str(m.get("transferId") or "").strip()
        if not tid or tid not in pending_transfer_ids:
            continue
+        if tid in expired_transfer_ids:
+            continue
        mid = str(m.get("id") or "").strip()
        if mid and mid in backfilled_message_ids:
            continue
--- a/src/wechat_decrypt_tool/routers/chat_export.py
+++ b/src/wechat_decrypt_tool/routers/chat_export.py
@@ -12,17 +12,31 @@ from ..path_fix import PathFixRoute

 router = APIRouter(route_class=PathFixRoute)

-ExportFormat = Literal["json", "txt"]
+ExportFormat = Literal["json", "txt", "html"]
 ExportScope = Literal["selected", "all", "groups", "singles"]
 MediaKind = Literal["image", "emoji", "video", "video_thumb", "voice", "file"]
-MessageType = Literal["text", "image", "emoji", "video", "voice", "file", "link", "transfer", "redPacket", "system", "quote", "voip"]
+MessageType = Literal[
+    "text",
+    "image",
+    "emoji",
+    "video",
+    "voice",
+    "chatHistory",
+    "file",
+    "link",
+    "transfer",
+    "redPacket",
+    "system",
+    "quote",
+    "voip",
+]


 class ChatExportCreateRequest(BaseModel):
    account: Optional[str] = Field(None, description="账号目录名（可选，默认使用第一个）")
    scope: ExportScope = Field("selected", description="导出范围：selected=指定会话；all=全部；groups=仅群聊；singles=仅单聊")
    usernames: list[str] = Field(default_factory=list, description="会话 username 列表（scope=selected 时使用）")
-    format: ExportFormat = Field("json", description="导出格式：json 或 txt（zip 内每个会话一个文件）")
+    format: ExportFormat = Field("json", description="导出格式：json/txt/html（zip 内每个会话一个文件；html 可离线打开 index.html 查看）")
    start_time: Optional[int] = Field(None, description="起始时间（Unix 秒，含）")
    end_time: Optional[int] = Field(None, description="结束时间（Unix 秒，含）")
    include_hidden: bool = Field(False, description="是否包含隐藏会话（scope!=selected 时）")
@@ -41,6 +55,10 @@ class ChatExportCreateRequest(BaseModel):
        False,
        description="预留字段：本项目不从微信进程提取媒体密钥，请使用 wx_key 获取并保存/批量解密",
    )
+    download_remote_media: bool = Field(
+        False,
+        description="HTML 导出时允许联网下载链接/引用缩略图等远程媒体（提高离线完整性）",
+    )
    privacy_mode: bool = Field(
        False,
        description="隐私模式导出：隐藏会话/用户名/内容，不打包头像与媒体",
@@ -64,6 +82,7 @@ async def create_chat_export(req: ChatExportCreateRequest):
        message_types=req.message_types,
        output_dir=req.output_dir,
        allow_process_key_extract=req.allow_process_key_extract,
+        download_remote_media=req.download_remote_media,
        privacy_mode=req.privacy_mode,
        file_name=req.file_name,
    )
--- a/src/wechat_decrypt_tool/wrapped/cards/card_04_emoji_universe.py
+++ b/src/wechat_decrypt_tool/wrapped/cards/card_04_emoji_universe.py
--- a/src/wechat_decrypt_tool/wrapped/service.py
+++ b/src/wechat_decrypt_tool/wrapped/service.py
@@ -16,15 +16,16 @@ from .cards.card_00_global_overview import build_card_00_global_overview
 from .cards.card_01_cyber_schedule import WeekdayHourHeatmap, build_card_01_cyber_schedule, compute_weekday_hour_heatmap
 from .cards.card_02_message_chars import build_card_02_message_chars
 from .cards.card_03_reply_speed import build_card_03_reply_speed
+from .cards.card_04_emoji_universe import build_card_04_emoji_universe

 logger = get_logger(__name__)


 # We use this number to version the cache filename so adding more cards won't accidentally serve
 # an older partial cache.
-_IMPLEMENTED_UPTO_ID = 3
+_IMPLEMENTED_UPTO_ID = 4
 # Bump this when we change card payloads/ordering while keeping the same implemented_upto.
-_CACHE_VERSION = 9
+_CACHE_VERSION = 15


 # "Manifest" is used by the frontend to render the deck quickly, then lazily fetch each card.
@@ -58,6 +59,13 @@ _WRAPPED_CARD_MANIFEST: tuple[dict[str, Any], ...] = (
        "category": "B",
        "kind": "chat/reply_speed",
    },
+    {
+        "id": 4,
+        "title": "这一年，你的表情包里藏了多少心情？",
+        "scope": "global",
+        "category": "B",
+        "kind": "emoji/annual_universe",
+    },
 )
 _WRAPPED_CARD_ID_SET = {int(c["id"]) for c in _WRAPPED_CARD_MANIFEST}

@@ -274,7 +282,7 @@ def build_wrapped_annual_response(
 ) -> dict[str, Any]:
    """Build annual wrapped response for the given account/year.

-    For now we implement cards up to id=3 (plus a meta overview card id=0).
+    For now we implement cards up to id=4 (plus a meta overview card id=0).
    """

    account_dir = _resolve_account_dir(account)
@@ -317,6 +325,8 @@ def build_wrapped_annual_response(
    cards.append(build_card_02_message_chars(account_dir=account_dir, year=y))
    # Page 5: reply speed / best chat buddy.
    cards.append(build_card_03_reply_speed(account_dir=account_dir, year=y))
+    # Page 6: annual emoji universe / meme almanac.
+    cards.append(build_card_04_emoji_universe(account_dir=account_dir, year=y))

    obj: dict[str, Any] = {
        "account": account_dir.name,
@@ -508,6 +518,8 @@ def build_wrapped_annual_card(
            card = build_card_02_message_chars(account_dir=account_dir, year=y)
        elif cid == 3:
            card = build_card_03_reply_speed(account_dir=account_dir, year=y)
+        elif cid == 4:
+            card = build_card_04_emoji_universe(account_dir=account_dir, year=y)
        else:
            # Should be unreachable due to _WRAPPED_CARD_ID_SET check.
            raise ValueError(f"Unknown Wrapped card id: {cid}")