diff --git a/src/wechat_decrypt_tool/routers/chat.py b/src/wechat_decrypt_tool/routers/chat.py index 6cbdc5a..b55db16 100644 --- a/src/wechat_decrypt_tool/routers/chat.py +++ b/src/wechat_decrypt_tool/routers/chat.py @@ -73,6 +73,18 @@ def _normalize_session_type(value: Optional[str]) -> Optional[str]: raise HTTPException(status_code=400, detail="Invalid session_type, use 'group' or 'single'.") +def _normalize_render_type_key(value: Any) -> str: + v = str(value or "").strip() + if not v: + return "" + if v == "redPacket": + return "redpacket" + lower = v.lower() + if lower in {"redpacket", "red_packet", "red-packet", "redenvelope", "red_envelope"}: + return "redpacket" + return lower + + @router.get("/api/chat/search-index/status", summary="消息搜索索引状态") async def chat_search_index_status(account: Optional[str] = None): account_dir = _resolve_account_dir(account) @@ -1082,6 +1094,440 @@ async def list_chat_sessions( } +def _collect_chat_messages( + *, + username: str, + account_dir: Path, + db_paths: list[Path], + resource_conn: Optional[sqlite3.Connection], + resource_chat_id: Optional[int], + take: int, + want_types: Optional[set[str]], +) -> tuple[list[dict[str, Any]], bool, list[str], list[str], set[str]]: + is_group = bool(username.endswith("@chatroom")) + take = int(take) + if take < 0: + take = 0 + take_probe = take + 1 + + merged: list[dict[str, Any]] = [] + sender_usernames: list[str] = [] + quote_usernames: list[str] = [] + pat_usernames: set[str] = set() + has_more_any = False + + for db_path in db_paths: + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + try: + table_name = _resolve_msg_table_name(conn, username) + if not table_name: + continue + + my_wxid = account_dir.name + my_rowid = None + try: + r = conn.execute( + "SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1", + (my_wxid,), + ).fetchone() + if r is not None: + my_rowid = int(r[0]) + except Exception: + my_rowid = None + + quoted_table = _quote_ident(table_name) + sql_with_join = ( + "SELECT " + "m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, " + "m.message_content, m.compress_content, n.user_name AS sender_username " + f"FROM {quoted_table} m " + "LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid " + "ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC " + "LIMIT ?" + ) + sql_no_join = ( + "SELECT " + "m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, " + "m.message_content, m.compress_content, '' AS sender_username " + f"FROM {quoted_table} m " + "ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC " + "LIMIT ?" + ) + + # Force sqlite3 to return TEXT as raw bytes for this query, so we can zstd-decompress + # compress_content reliably. + conn.text_factory = bytes + + try: + rows = conn.execute(sql_with_join, (take_probe,)).fetchall() + except Exception: + rows = conn.execute(sql_no_join, (take_probe,)).fetchall() + if len(rows) > take: + has_more_any = True + rows = rows[:take] + + for r in rows: + local_id = int(r["local_id"] or 0) + create_time = int(r["create_time"] or 0) + sort_seq = int(r["sort_seq"] or 0) if r["sort_seq"] is not None else 0 + local_type = int(r["local_type"] or 0) + sender_username = _decode_sqlite_text(r["sender_username"]).strip() + + is_sent = False + if my_rowid is not None: + try: + is_sent = int(r["real_sender_id"] or 0) == int(my_rowid) + except Exception: + is_sent = False + + raw_text = _decode_message_content(r["compress_content"], r["message_content"]) + raw_text = raw_text.strip() + + sender_prefix = "" + if is_group and not raw_text.startswith("<") and not raw_text.startswith('"<'): + sender_prefix, raw_text = _split_group_sender_prefix(raw_text) + + if is_group and sender_prefix: + sender_username = sender_prefix + + if is_group and (raw_text.startswith("<") or raw_text.startswith('"<')): + xml_sender = _extract_sender_from_group_xml(raw_text) + if xml_sender: + sender_username = xml_sender + + if is_sent: + sender_username = account_dir.name + elif (not is_group) and (not sender_username): + sender_username = username + + render_type = "text" + content_text = raw_text + title = "" + url = "" + image_md5 = "" + emoji_md5 = "" + emoji_url = "" + thumb_url = "" + image_url = "" + image_file_id = "" + video_md5 = "" + video_thumb_md5 = "" + video_file_id = "" + video_thumb_file_id = "" + video_url = "" + video_thumb_url = "" + voice_length = "" + quote_username = "" + quote_title = "" + quote_content = "" + quote_server_id = "" + quote_type = "" + quote_voice_length = "" + amount = "" + cover_url = "" + file_size = "" + pay_sub_type = "" + transfer_status = "" + file_md5 = "" + transfer_id = "" + voip_type = "" + + if local_type == 10000: + render_type = "system" + if "revokemsg" in raw_text: + content_text = "撤回了一条消息" + else: + import re + + content_text = re.sub(r"]*>", "", raw_text) + content_text = re.sub(r"\s+", " ", content_text).strip() or "[系统消息]" + elif local_type == 49: + parsed = _parse_app_message(raw_text) + render_type = str(parsed.get("renderType") or "text") + content_text = str(parsed.get("content") or "") + title = str(parsed.get("title") or "") + url = str(parsed.get("url") or "") + quote_title = str(parsed.get("quoteTitle") or "") + quote_content = str(parsed.get("quoteContent") or "") + quote_username = str(parsed.get("quoteUsername") or "") + quote_server_id = str(parsed.get("quoteServerId") or "") + quote_type = str(parsed.get("quoteType") or "") + quote_voice_length = str(parsed.get("quoteVoiceLength") or "") + amount = str(parsed.get("amount") or "") + cover_url = str(parsed.get("coverUrl") or "") + thumb_url = str(parsed.get("thumbUrl") or "") + file_size = str(parsed.get("size") or "") + pay_sub_type = str(parsed.get("paySubType") or "") + file_md5 = str(parsed.get("fileMd5") or "") + transfer_id = str(parsed.get("transferId") or "") + + if render_type == "transfer": + # 直接从原始 XML 提取 transferid(可能在 wcpayinfo 内) + if not transfer_id: + transfer_id = _extract_xml_tag_or_attr(raw_text, "transferid") or "" + transfer_status = _infer_transfer_status_text( + is_sent=is_sent, + paysubtype=pay_sub_type, + receivestatus=str(parsed.get("receiveStatus") or ""), + sendertitle=str(parsed.get("senderTitle") or ""), + receivertitle=str(parsed.get("receiverTitle") or ""), + senderdes=str(parsed.get("senderDes") or ""), + receiverdes=str(parsed.get("receiverDes") or ""), + ) + if not content_text: + content_text = transfer_status or "转账" + elif local_type == 266287972401: + render_type = "system" + template = _extract_xml_tag_text(raw_text, "template") + if template: + import re + + pat_usernames.update({m.group(1) for m in re.finditer(r"\$\{([^}]+)\}", template) if m.group(1)}) + content_text = "[拍一拍]" + else: + content_text = "[拍一拍]" + elif local_type == 244813135921: + render_type = "quote" + parsed = _parse_app_message(raw_text) + content_text = str(parsed.get("content") or "[引用消息]") + quote_title = str(parsed.get("quoteTitle") or "") + quote_content = str(parsed.get("quoteContent") or "") + quote_username = str(parsed.get("quoteUsername") or "") + quote_server_id = str(parsed.get("quoteServerId") or "") + quote_type = str(parsed.get("quoteType") or "") + quote_voice_length = str(parsed.get("quoteVoiceLength") or "") + elif local_type == 3: + render_type = "image" + # 先尝试从 XML 中提取 md5(不同版本字段可能不同) + image_md5 = _extract_xml_attr(raw_text, "md5") or _extract_xml_tag_text(raw_text, "md5") + if not image_md5: + for k in [ + "cdnthumbmd5", + "cdnthumd5", + "cdnmidimgmd5", + "cdnbigimgmd5", + "hdmd5", + "hevc_mid_md5", + "hevc_md5", + "imgmd5", + "filemd5", + ]: + image_md5 = _extract_xml_attr(raw_text, k) or _extract_xml_tag_text(raw_text, k) + if image_md5: + break + + # Extract CDN URL (some versions store a non-HTTP "file id" string here) + _cdn_url_or_id = ( + _extract_xml_attr(raw_text, "cdnthumburl") + or _extract_xml_attr(raw_text, "cdnthumurl") + or _extract_xml_attr(raw_text, "cdnmidimgurl") + or _extract_xml_attr(raw_text, "cdnbigimgurl") + or _extract_xml_tag_text(raw_text, "cdnthumburl") + or _extract_xml_tag_text(raw_text, "cdnthumurl") + or _extract_xml_tag_text(raw_text, "cdnmidimgurl") + or _extract_xml_tag_text(raw_text, "cdnbigimgurl") + ) + _cdn_url_or_id = str(_cdn_url_or_id or "").strip() + image_url = _cdn_url_or_id if _cdn_url_or_id.startswith(("http://", "https://")) else "" + if (not image_url) and _cdn_url_or_id: + image_file_id = _cdn_url_or_id + + if (not image_md5) and resource_conn is not None: + image_md5 = _lookup_resource_md5( + resource_conn, + resource_chat_id, + message_local_type=local_type, + server_id=int(r["server_id"] or 0), + local_id=local_id, + create_time=create_time, + ) + content_text = "[图片]" + elif local_type == 34: + render_type = "voice" + duration = _extract_xml_attr(raw_text, "voicelength") + voice_length = duration + content_text = f"[语音 {duration}秒]" if duration else "[语音]" + elif local_type == 43 or local_type == 62: + render_type = "video" + video_md5 = _extract_xml_attr(raw_text, "md5") + video_thumb_md5 = _extract_xml_attr(raw_text, "cdnthumbmd5") + video_thumb_url_or_id = _extract_xml_attr(raw_text, "cdnthumburl") or _extract_xml_tag_text( + raw_text, "cdnthumburl" + ) + video_url_or_id = _extract_xml_attr(raw_text, "cdnvideourl") or _extract_xml_tag_text( + raw_text, "cdnvideourl" + ) + + video_thumb_url = ( + video_thumb_url_or_id + if str(video_thumb_url_or_id or "").strip().lower().startswith(("http://", "https://")) + else "" + ) + video_url = ( + video_url_or_id + if str(video_url_or_id or "").strip().lower().startswith(("http://", "https://")) + else "" + ) + video_thumb_file_id = "" if video_thumb_url else (str(video_thumb_url_or_id or "").strip() or "") + video_file_id = "" if video_url else (str(video_url_or_id or "").strip() or "") + if (not video_thumb_md5) and resource_conn is not None: + video_thumb_md5 = _lookup_resource_md5( + resource_conn, + resource_chat_id, + message_local_type=local_type, + server_id=int(r["server_id"] or 0), + local_id=local_id, + create_time=create_time, + ) + content_text = "[视频]" + elif local_type == 47: + render_type = "emoji" + emoji_md5 = _extract_xml_attr(raw_text, "md5") + if not emoji_md5: + emoji_md5 = _extract_xml_tag_text(raw_text, "md5") + emoji_url = _extract_xml_attr(raw_text, "cdnurl") + if not emoji_url: + emoji_url = _extract_xml_tag_text(raw_text, "cdn_url") + if (not emoji_md5) and resource_conn is not None: + emoji_md5 = _lookup_resource_md5( + resource_conn, + resource_chat_id, + message_local_type=local_type, + server_id=int(r["server_id"] or 0), + local_id=local_id, + create_time=create_time, + ) + content_text = "[表情]" + elif local_type == 50: + render_type = "voip" + try: + import re + + block = raw_text + m_voip = re.search( + r"(]*>.*?)", + raw_text, + flags=re.IGNORECASE | re.DOTALL, + ) + if m_voip: + block = m_voip.group(1) or raw_text + room_type = str(_extract_xml_tag_text(block, "room_type") or "").strip() + if room_type == "0": + voip_type = "video" + elif room_type == "1": + voip_type = "audio" + + voip_msg = str(_extract_xml_tag_text(block, "msg") or "").strip() + content_text = voip_msg or "通话" + except Exception: + content_text = "通话" + elif local_type != 1: + if not content_text: + content_text = _infer_message_brief_by_local_type(local_type) + else: + if content_text.startswith("<") or content_text.startswith('"<'): + if "= (int(offset) + int(limit))) or (not has_more_any): + break + + next_take = scan_take * 2 if scan_take > 0 else (int(limit) + int(offset)) + if next_take <= scan_take: + break + scan_take = next_take + + r""" take = int(limit) + int(offset) take_probe = take + 1 merged: list[dict[str, Any]] = [] @@ -1547,6 +2042,7 @@ async def list_chat_messages( finally: conn.close() + """ if resource_conn is not None: try: resource_conn.close()