Files
WeChatDataAnalysis/src/wechat_decrypt_tool/routers/chat.py
2977094657 0349d89def fix(chat): 图片消息优先使用资源 md5 修复 _t.dat 缩略图不显示
- 图片消息(local_type=3)优先使用 message_resource.db 的资源 md5,避免 XML md5 与本地 *_t.dat 基名不一致导致 /api/chat/media/image 404

- 同步覆盖消息合并/导出/列表三处解析路径

refs #12
2025-12-31 11:34:28 +08:00

3293 lines
136 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
import re
import sqlite3
from pathlib import Path
from typing import Any, Optional
from urllib.parse import quote
from fastapi import APIRouter, HTTPException, Request
from ..logging_config import get_logger
from ..chat_search_index import (
get_chat_search_index_db_path,
get_chat_search_index_status,
start_chat_search_index_build,
)
from ..chat_helpers import (
_build_avatar_url,
_build_fts_query,
_decode_message_content,
_decode_sqlite_text,
_extract_sender_from_group_xml,
_extract_xml_attr,
_extract_xml_tag_or_attr,
_extract_xml_tag_text,
_format_session_time,
_infer_last_message_brief,
_infer_message_brief_by_local_type,
_infer_transfer_status_text,
_iter_message_db_paths,
_list_decrypted_accounts,
_make_search_tokens,
_make_snippet,
_match_tokens,
_load_contact_rows,
_load_latest_message_previews,
_lookup_resource_md5,
_normalize_xml_url,
_parse_app_message,
_parse_pat_message,
_pick_avatar_url,
_pick_display_name,
_query_head_image_usernames,
_quote_ident,
_resolve_account_dir,
_resolve_msg_table_name,
_row_to_search_hit,
_resource_lookup_chat_id,
_should_keep_session,
_split_group_sender_prefix,
_to_char_token_text,
)
from ..media_helpers import _try_find_decrypted_resource
from ..path_fix import PathFixRoute
from ..session_last_message import (
build_session_last_message_table,
get_session_last_message_status,
load_session_last_messages,
)
logger = get_logger(__name__)
_DEBUG_SESSIONS = os.environ.get("WECHAT_TOOL_DEBUG_SESSIONS", "0") == "1"
router = APIRouter(route_class=PathFixRoute)
def _normalize_session_type(value: Optional[str]) -> Optional[str]:
v = str(value or "").strip().lower()
if not v or v in {"all", "any", "none", "null", "0"}:
return None
if v in {"group", "groups", "chatroom", "chatrooms"}:
return "group"
if v in {"single", "singles", "person", "people", "user", "users", "contact", "contacts"}:
return "single"
raise HTTPException(status_code=400, detail="Invalid session_type, use 'group' or 'single'.")
def _normalize_render_type_key(value: Any) -> str:
v = str(value or "").strip()
if not v:
return ""
if v == "redPacket":
return "redpacket"
lower = v.lower()
if lower in {"redpacket", "red_packet", "red-packet", "redenvelope", "red_envelope"}:
return "redpacket"
return lower
@router.get("/api/chat/search-index/status", summary="消息搜索索引状态")
async def chat_search_index_status(account: Optional[str] = None):
account_dir = _resolve_account_dir(account)
return get_chat_search_index_status(account_dir)
@router.post("/api/chat/search-index/build", summary="构建/重建消息搜索索引")
async def chat_search_index_build(account: Optional[str] = None, rebuild: bool = False):
account_dir = _resolve_account_dir(account)
return start_chat_search_index_build(account_dir, rebuild=bool(rebuild))
@router.get("/api/chat/session-last-message/status", summary="会话最后一条消息缓存表状态")
async def session_last_message_status(account: Optional[str] = None):
account_dir = _resolve_account_dir(account)
return get_session_last_message_status(account_dir)
@router.post("/api/chat/session-last-message/build", summary="构建/重建会话最后一条消息缓存表")
async def session_last_message_build(
account: Optional[str] = None,
rebuild: bool = False,
include_hidden: bool = True,
include_official: bool = True,
):
account_dir = _resolve_account_dir(account)
return build_session_last_message_table(
account_dir,
rebuild=bool(rebuild),
include_hidden=bool(include_hidden),
include_official=bool(include_official),
)
@router.get("/api/chat/search-index/senders", summary="消息搜索索引发送者列表")
async def chat_search_index_senders(
account: Optional[str] = None,
username: Optional[str] = None,
session_type: Optional[str] = None,
message_q: Optional[str] = None,
limit: int = 200,
q: Optional[str] = None,
start_time: Optional[int] = None,
end_time: Optional[int] = None,
render_types: Optional[str] = None,
include_hidden: bool = False,
include_official: bool = False,
):
if limit <= 0:
limit = 200
if limit > 2000:
limit = 2000
username = str(username or "").strip()
if not username:
username = None
session_type_norm = _normalize_session_type(session_type)
message_q = str(message_q or "").strip()
if not message_q:
message_q = None
q = str(q or "").strip()
if not q:
q = None
account_dir = _resolve_account_dir(account)
contact_db_path = account_dir / "contact.db"
index_status = get_chat_search_index_status(account_dir)
index = dict(index_status.get("index") or {})
build = dict(index.get("build") or {})
index_exists = bool(index.get("exists"))
index_ready = bool(index.get("ready"))
build_status = str(build.get("status") or "").strip()
if (not index_ready) and build_status not in {"building", "error"}:
start_chat_search_index_build(account_dir, rebuild=bool(index_exists))
index_status = get_chat_search_index_status(account_dir)
index = dict(index_status.get("index") or {})
build = dict(index.get("build") or {})
build_status = str(build.get("status") or "").strip()
index_exists = bool(index.get("exists"))
index_ready = bool(index.get("ready"))
if build_status == "error":
return {
"status": "index_error",
"account": account_dir.name,
"username": username,
"scope": "conversation" if username else "global",
"senders": [],
"index": index,
"message": str(build.get("error") or "Search index build failed."),
}
if not index_ready:
return {
"status": "index_building",
"account": account_dir.name,
"username": username,
"scope": "conversation" if username else "global",
"senders": [],
"index": index,
"message": "Search index is building. Please retry in a moment.",
}
if username is None and message_q is None:
return {
"status": "success",
"account": account_dir.name,
"username": None,
"scope": "global",
"senders": [],
"index": index,
"message": "Provide message_q to list global senders.",
}
index_db_path = get_chat_search_index_db_path(account_dir)
conn = sqlite3.connect(str(index_db_path))
conn.row_factory = sqlite3.Row
try:
where_parts: list[str] = ["sender_username <> ''"]
params: list[Any] = []
if message_q is not None:
fts_query = _build_fts_query(message_q)
if fts_query:
where_parts.insert(0, "message_fts MATCH ?")
params.append(fts_query)
if username is not None:
where_parts.append("username = ?")
params.append(username)
elif session_type_norm == "group":
where_parts.append("username LIKE ?")
params.append("%@chatroom")
elif session_type_norm == "single":
where_parts.append("username NOT LIKE ?")
params.append("%@chatroom")
if q is not None:
where_parts.append("sender_username LIKE ?")
params.append(f"%{q}%")
want_types: Optional[set[str]] = None
if render_types is not None:
parts = [p.strip() for p in str(render_types or "").split(",") if p.strip()]
want_types = {p for p in parts if p}
if not want_types:
want_types = None
if want_types is not None:
types_sorted = sorted(want_types)
placeholders = ",".join(["?"] * len(types_sorted))
where_parts.append(f"render_type IN ({placeholders})")
params.extend(types_sorted)
start_ts = int(start_time) if start_time is not None else None
end_ts = int(end_time) if end_time is not None else None
if start_ts is not None and start_ts < 0:
start_ts = 0
if end_ts is not None and end_ts < 0:
end_ts = 0
if start_ts is not None:
where_parts.append("CAST(create_time AS INTEGER) >= ?")
params.append(int(start_ts))
if end_ts is not None:
where_parts.append("CAST(create_time AS INTEGER) <= ?")
params.append(int(end_ts))
if not include_hidden:
where_parts.append("CAST(is_hidden AS INTEGER) = 0")
if not include_official:
where_parts.append("CAST(is_official AS INTEGER) = 0")
where_sql = " AND ".join(where_parts)
rows = conn.execute(
f"""
SELECT
sender_username AS sender_username,
COUNT(*) AS c
FROM message_fts
WHERE {where_sql}
GROUP BY sender_username
ORDER BY c DESC, sender_username ASC
LIMIT ?
""",
params + [int(limit)],
).fetchall()
finally:
conn.close()
sender_usernames = [str(r["sender_username"] or "").strip() for r in rows if r and r["sender_username"]]
sender_usernames = [u for u in sender_usernames if u]
contact_rows = _load_contact_rows(contact_db_path, sender_usernames)
head_image_db_path = account_dir / "head_image.db"
local_sender_avatars = _query_head_image_usernames(head_image_db_path, sender_usernames)
senders: list[dict[str, Any]] = []
for r in rows:
su = str(r["sender_username"] or "").strip()
if not su:
continue
cnt = int(r["c"] or 0)
row = contact_rows.get(su)
avatar_url = _pick_avatar_url(row)
if (not avatar_url) and (su in local_sender_avatars):
avatar_url = _build_avatar_url(account_dir.name, su)
senders.append(
{
"username": su,
"displayName": _pick_display_name(row, su) if row is not None else su,
"avatar": avatar_url,
"count": cnt,
}
)
return {
"status": "success",
"account": account_dir.name,
"username": username,
"scope": "conversation" if username else "global",
"senders": senders,
"index": index,
}
def _append_full_messages_from_rows(
*,
merged: list[dict[str, Any]],
sender_usernames: list[str],
quote_usernames: list[str],
pat_usernames: set[str],
rows: list[sqlite3.Row],
db_path: Path,
table_name: str,
username: str,
account_dir: Path,
is_group: bool,
my_rowid: Optional[int],
resource_conn: Optional[sqlite3.Connection],
resource_chat_id: Optional[int],
) -> None:
for r in rows:
local_id = int(r["local_id"] or 0)
create_time = int(r["create_time"] or 0)
sort_seq = int(r["sort_seq"] or 0) if r["sort_seq"] is not None else 0
local_type = int(r["local_type"] or 0)
sender_username = _decode_sqlite_text(r["sender_username"]).strip()
is_sent = False
if my_rowid is not None:
try:
is_sent = int(r["real_sender_id"] or 0) == int(my_rowid)
except Exception:
is_sent = False
raw_text = _decode_message_content(r["compress_content"], r["message_content"])
raw_text = raw_text.strip()
sender_prefix = ""
if is_group and not raw_text.startswith("<") and not raw_text.startswith('"<'):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
if is_group and sender_prefix:
sender_username = sender_prefix
if is_group and (raw_text.startswith("<") or raw_text.startswith('"<')):
xml_sender = _extract_sender_from_group_xml(raw_text)
if xml_sender:
sender_username = xml_sender
if is_sent:
sender_username = account_dir.name
elif (not is_group) and (not sender_username):
sender_username = username
if sender_username:
sender_usernames.append(sender_username)
render_type = "text"
content_text = raw_text
title = ""
url = ""
image_md5 = ""
emoji_md5 = ""
emoji_url = ""
thumb_url = ""
image_url = ""
image_file_id = ""
video_md5 = ""
video_thumb_md5 = ""
video_file_id = ""
video_thumb_file_id = ""
video_url = ""
video_thumb_url = ""
voice_length = ""
quote_username = ""
quote_title = ""
quote_content = ""
quote_server_id = ""
quote_type = ""
quote_voice_length = ""
amount = ""
cover_url = ""
file_size = ""
pay_sub_type = ""
transfer_status = ""
file_md5 = ""
transfer_id = ""
voip_type = ""
if local_type == 10000:
render_type = "system"
if "revokemsg" in raw_text:
content_text = "撤回了一条消息"
else:
content_text = re.sub(r"</?[_a-zA-Z0-9]+[^>]*>", "", raw_text)
content_text = re.sub(r"\s+", " ", content_text).strip() or "[系统消息]"
elif local_type == 49:
parsed = _parse_app_message(raw_text)
render_type = str(parsed.get("renderType") or "text")
content_text = str(parsed.get("content") or "")
title = str(parsed.get("title") or "")
url = str(parsed.get("url") or "")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
quote_username = str(parsed.get("quoteUsername") or "")
quote_server_id = str(parsed.get("quoteServerId") or "")
quote_type = str(parsed.get("quoteType") or "")
quote_voice_length = str(parsed.get("quoteVoiceLength") or "")
amount = str(parsed.get("amount") or "")
cover_url = str(parsed.get("coverUrl") or "")
thumb_url = str(parsed.get("thumbUrl") or "")
file_size = str(parsed.get("size") or "")
pay_sub_type = str(parsed.get("paySubType") or "")
file_md5 = str(parsed.get("fileMd5") or "")
transfer_id = str(parsed.get("transferId") or "")
if render_type == "transfer":
# 直接从原始 XML 提取 transferid可能在 wcpayinfo 内)
if not transfer_id:
transfer_id = _extract_xml_tag_or_attr(raw_text, "transferid") or ""
transfer_status = _infer_transfer_status_text(
is_sent=is_sent,
paysubtype=pay_sub_type,
receivestatus=str(parsed.get("receiveStatus") or ""),
sendertitle=str(parsed.get("senderTitle") or ""),
receivertitle=str(parsed.get("receiverTitle") or ""),
senderdes=str(parsed.get("senderDes") or ""),
receiverdes=str(parsed.get("receiverDes") or ""),
)
if not content_text:
content_text = transfer_status or "转账"
elif local_type == 266287972401:
render_type = "system"
template = _extract_xml_tag_text(raw_text, "template")
if template:
pat_usernames.update(
{m.group(1) for m in re.finditer(r"\$\{([^}]+)\}", template) if m.group(1)}
)
content_text = "[拍一拍]"
else:
content_text = "[拍一拍]"
elif local_type == 244813135921:
render_type = "quote"
parsed = _parse_app_message(raw_text)
content_text = str(parsed.get("content") or "[引用消息]")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
quote_username = str(parsed.get("quoteUsername") or "")
quote_server_id = str(parsed.get("quoteServerId") or "")
quote_type = str(parsed.get("quoteType") or "")
quote_voice_length = str(parsed.get("quoteVoiceLength") or "")
elif local_type == 3:
render_type = "image"
# 先尝试从 XML 中提取 md5不同版本字段可能不同
image_md5 = _extract_xml_attr(raw_text, "md5") or _extract_xml_tag_text(raw_text, "md5")
if not image_md5:
for k in [
"cdnthumbmd5",
"cdnthumd5",
"cdnmidimgmd5",
"cdnbigimgmd5",
"hdmd5",
"hevc_mid_md5",
"hevc_md5",
"imgmd5",
"filemd5",
]:
image_md5 = _extract_xml_attr(raw_text, k) or _extract_xml_tag_text(raw_text, k)
if image_md5:
break
# Prefer message_resource.db md5 for local files: XML md5 frequently differs from the on-disk *.dat basename
# (especially for *_t.dat thumbnails), causing the media endpoint to 404.
if resource_conn is not None:
try:
resource_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
except Exception:
resource_md5 = ""
resource_md5 = str(resource_md5 or "").strip().lower()
if len(resource_md5) == 32 and all(c in "0123456789abcdef" for c in resource_md5):
image_md5 = resource_md5
# Extract CDN URL (some versions store a non-HTTP "file id" string here)
_cdn_url_or_id = (
_extract_xml_attr(raw_text, "cdnthumburl")
or _extract_xml_attr(raw_text, "cdnthumurl")
or _extract_xml_attr(raw_text, "cdnmidimgurl")
or _extract_xml_attr(raw_text, "cdnbigimgurl")
or _extract_xml_tag_text(raw_text, "cdnthumburl")
or _extract_xml_tag_text(raw_text, "cdnthumurl")
or _extract_xml_tag_text(raw_text, "cdnmidimgurl")
or _extract_xml_tag_text(raw_text, "cdnbigimgurl")
)
_cdn_url_or_id = _normalize_xml_url(_cdn_url_or_id)
image_url = (
_cdn_url_or_id if str(_cdn_url_or_id).lower().startswith(("http://", "https://")) else ""
)
if (not image_url) and _cdn_url_or_id:
image_file_id = _cdn_url_or_id
content_text = "[图片]"
elif local_type == 34:
render_type = "voice"
duration = _extract_xml_attr(raw_text, "voicelength")
voice_length = duration
content_text = f"[语音 {duration}秒]" if duration else "[语音]"
elif local_type == 43 or local_type == 62:
render_type = "video"
video_md5 = _extract_xml_attr(raw_text, "md5")
video_thumb_md5 = _extract_xml_attr(raw_text, "cdnthumbmd5")
video_thumb_url_or_id = _extract_xml_attr(raw_text, "cdnthumburl") or _extract_xml_tag_text(
raw_text, "cdnthumburl"
)
video_url_or_id = _extract_xml_attr(raw_text, "cdnvideourl") or _extract_xml_tag_text(
raw_text, "cdnvideourl"
)
video_thumb_url_or_id = _normalize_xml_url(video_thumb_url_or_id)
video_url_or_id = _normalize_xml_url(video_url_or_id)
video_thumb_url = (
video_thumb_url_or_id
if str(video_thumb_url_or_id or "").strip().lower().startswith(("http://", "https://"))
else ""
)
video_url = (
video_url_or_id
if str(video_url_or_id or "").strip().lower().startswith(("http://", "https://"))
else ""
)
video_thumb_file_id = "" if video_thumb_url else (str(video_thumb_url_or_id or "").strip() or "")
video_file_id = "" if video_url else (str(video_url_or_id or "").strip() or "")
if (not video_thumb_md5) and resource_conn is not None:
video_thumb_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
content_text = "[视频]"
elif local_type == 47:
render_type = "emoji"
emoji_md5 = _extract_xml_attr(raw_text, "md5")
if not emoji_md5:
emoji_md5 = _extract_xml_tag_text(raw_text, "md5")
emoji_url = _extract_xml_attr(raw_text, "cdnurl")
if not emoji_url:
emoji_url = _extract_xml_tag_text(raw_text, "cdn_url")
emoji_url = _normalize_xml_url(emoji_url)
if (not emoji_md5) and resource_conn is not None:
emoji_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
content_text = "[表情]"
elif local_type == 50:
render_type = "voip"
try:
block = raw_text
m_voip = re.search(
r"(<VoIPBubbleMsg[^>]*>.*?</VoIPBubbleMsg>)",
raw_text,
flags=re.IGNORECASE | re.DOTALL,
)
if m_voip:
block = m_voip.group(1) or raw_text
room_type = str(_extract_xml_tag_text(block, "room_type") or "").strip()
if room_type == "0":
voip_type = "video"
elif room_type == "1":
voip_type = "audio"
voip_msg = str(_extract_xml_tag_text(block, "msg") or "").strip()
content_text = voip_msg or "通话"
except Exception:
content_text = "通话"
elif local_type != 1:
if not content_text:
content_text = _infer_message_brief_by_local_type(local_type)
else:
if content_text.startswith("<") or content_text.startswith('"<'):
if "<appmsg" in content_text.lower():
parsed = _parse_app_message(content_text)
rt = str(parsed.get("renderType") or "")
if rt and rt != "text":
render_type = rt
content_text = str(parsed.get("content") or content_text)
title = str(parsed.get("title") or title)
url = str(parsed.get("url") or url)
quote_title = str(parsed.get("quoteTitle") or quote_title)
quote_content = str(parsed.get("quoteContent") or quote_content)
amount = str(parsed.get("amount") or amount)
cover_url = str(parsed.get("coverUrl") or cover_url)
thumb_url = str(parsed.get("thumbUrl") or thumb_url)
file_size = str(parsed.get("size") or file_size)
pay_sub_type = str(parsed.get("paySubType") or pay_sub_type)
file_md5 = str(parsed.get("fileMd5") or file_md5)
transfer_id = str(parsed.get("transferId") or transfer_id)
if render_type == "transfer":
# 如果 transferId 仍为空,尝试从原始 XML 提取
if not transfer_id:
transfer_id = _extract_xml_tag_or_attr(content_text, "transferid") or ""
transfer_status = _infer_transfer_status_text(
is_sent=is_sent,
paysubtype=pay_sub_type,
receivestatus=str(parsed.get("receiveStatus") or ""),
sendertitle=str(parsed.get("senderTitle") or ""),
receivertitle=str(parsed.get("receiverTitle") or ""),
senderdes=str(parsed.get("senderDes") or ""),
receiverdes=str(parsed.get("receiverDes") or ""),
)
if not content_text:
content_text = transfer_status or "转账"
t = _extract_xml_tag_text(content_text, "title")
d = _extract_xml_tag_text(content_text, "des")
content_text = t or d or _infer_message_brief_by_local_type(local_type)
if not content_text:
content_text = _infer_message_brief_by_local_type(local_type)
if quote_username:
quote_usernames.append(str(quote_username).strip())
merged.append(
{
"id": f"{db_path.stem}:{table_name}:{local_id}",
"localId": local_id,
"serverId": int(r["server_id"] or 0),
"serverIdStr": str(int(r["server_id"] or 0)) if int(r["server_id"] or 0) else "",
"type": local_type,
"createTime": create_time,
"sortSeq": sort_seq,
"senderUsername": sender_username,
"isSent": bool(is_sent),
"renderType": render_type,
"content": content_text,
"title": title,
"url": url,
"imageMd5": image_md5,
"imageFileId": image_file_id,
"emojiMd5": emoji_md5,
"emojiUrl": emoji_url,
"thumbUrl": thumb_url,
"imageUrl": image_url,
"videoMd5": video_md5,
"videoThumbMd5": video_thumb_md5,
"videoFileId": video_file_id,
"videoThumbFileId": video_thumb_file_id,
"videoUrl": video_url,
"videoThumbUrl": video_thumb_url,
"voiceLength": voice_length,
"voipType": voip_type,
"quoteUsername": str(quote_username).strip(),
"quoteServerId": str(quote_server_id).strip(),
"quoteType": str(quote_type).strip(),
"quoteVoiceLength": str(quote_voice_length).strip(),
"quoteTitle": quote_title,
"quoteContent": quote_content,
"amount": amount,
"coverUrl": cover_url,
"fileSize": file_size,
"fileMd5": file_md5,
"paySubType": pay_sub_type,
"transferStatus": transfer_status,
"transferId": transfer_id,
"_rawText": raw_text if local_type == 266287972401 else "",
}
)
def _postprocess_full_messages(
*,
merged: list[dict[str, Any]],
sender_usernames: list[str],
quote_usernames: list[str],
pat_usernames: set[str],
account_dir: Path,
username: str,
base_url: str,
contact_db_path: Path,
head_image_db_path: Path,
) -> None:
# 后处理:关联转账消息的最终状态
# 策略:优先使用 transferId 精确匹配,回退到金额+时间窗口匹配
# paysubtype 含义1=不明确 3=已收款 4=对方退回给你 8=发起转账 9=被对方退回 10=已过期
# 收集已退还和已收款的转账ID和金额
returned_transfer_ids: set[str] = set() # 退还状态的 transferId
received_transfer_ids: set[str] = set() # 已收款状态的 transferId
returned_amounts_with_time: list[tuple[str, int]] = [] # (金额, 时间戳) 用于退还回退匹配
received_amounts_with_time: list[tuple[str, int]] = [] # (金额, 时间戳) 用于收款回退匹配
for m in merged:
if m.get("renderType") == "transfer":
pst = str(m.get("paySubType") or "")
tid = str(m.get("transferId") or "").strip()
amt = str(m.get("amount") or "")
ts = int(m.get("createTime") or 0)
if pst in ("4", "9"): # 退还状态
if tid:
returned_transfer_ids.add(tid)
if amt:
returned_amounts_with_time.append((amt, ts))
elif pst == "3": # 已收款状态
if tid:
received_transfer_ids.add(tid)
if amt:
received_amounts_with_time.append((amt, ts))
# 更新原始转账消息的状态
for m in merged:
if m.get("renderType") == "transfer":
pst = str(m.get("paySubType") or "")
# 只更新未确定状态的原始转账消息paysubtype=1 或 8
if pst in ("1", "8"):
tid = str(m.get("transferId") or "").strip()
amt = str(m.get("amount") or "")
ts = int(m.get("createTime") or 0)
# 优先检查退还状态(退还优先于收款)
should_mark_returned = False
should_mark_received = False
# 策略1精确 transferId 匹配
if tid:
if tid in returned_transfer_ids:
should_mark_returned = True
elif tid in received_transfer_ids:
should_mark_received = True
# 策略2回退到金额+时间窗口匹配24小时内同金额
if not should_mark_returned and not should_mark_received and amt:
for ret_amt, ret_ts in returned_amounts_with_time:
if ret_amt == amt and abs(ret_ts - ts) <= 86400:
should_mark_returned = True
break
if not should_mark_returned:
for rec_amt, rec_ts in received_amounts_with_time:
if rec_amt == amt and abs(rec_ts - ts) <= 86400:
should_mark_received = True
break
if should_mark_returned:
m["paySubType"] = "9"
m["transferStatus"] = "已被退还"
elif should_mark_received:
m["paySubType"] = "3"
# 根据 isSent 判断:发起方显示"已收款",收款方显示"已被接收"
is_sent = m.get("isSent", False)
m["transferStatus"] = "已收款" if is_sent else "已被接收"
uniq_senders = list(
dict.fromkeys([u for u in (sender_usernames + list(pat_usernames) + quote_usernames) if u])
)
sender_contact_rows = _load_contact_rows(contact_db_path, uniq_senders)
local_sender_avatars = _query_head_image_usernames(head_image_db_path, uniq_senders)
for m in merged:
su = str(m.get("senderUsername") or "")
if not su:
continue
row = sender_contact_rows.get(su)
m["senderDisplayName"] = _pick_display_name(row, su)
avatar_url = _pick_avatar_url(row)
if not avatar_url and su in local_sender_avatars:
avatar_url = base_url + _build_avatar_url(account_dir.name, su)
m["senderAvatar"] = avatar_url
qu = str(m.get("quoteUsername") or "").strip()
if qu:
qrow = sender_contact_rows.get(qu)
qt = str(m.get("quoteTitle") or "").strip()
if qrow is not None:
remark = ""
try:
remark = str(qrow["remark"] or "").strip()
except Exception:
remark = ""
if remark:
m["quoteTitle"] = remark
elif not qt:
m["quoteTitle"] = _pick_display_name(qrow, qu)
elif not qt:
m["quoteTitle"] = qu
# Media URL fallback: if CDN URLs missing, use local media endpoints.
try:
rt = str(m.get("renderType") or "")
if rt == "image":
if not str(m.get("imageUrl") or ""):
md5 = str(m.get("imageMd5") or "").strip()
file_id = str(m.get("imageFileId") or "").strip()
if md5:
m["imageUrl"] = (
base_url
+ f"/api/chat/media/image?account={quote(account_dir.name)}&md5={quote(md5)}&username={quote(username)}"
)
elif file_id:
m["imageUrl"] = (
base_url
+ f"/api/chat/media/image?account={quote(account_dir.name)}&file_id={quote(file_id)}&username={quote(username)}"
)
elif rt == "emoji":
md5 = str(m.get("emojiMd5") or "")
if md5:
existing_local: Optional[Path] = None
try:
existing_local = _try_find_decrypted_resource(account_dir, str(md5).lower())
except Exception:
existing_local = None
if existing_local:
try:
cur = str(m.get("emojiUrl") or "")
if cur and re.match(r"^https?://", cur, flags=re.I) and (
"/api/chat/media/emoji" not in cur
):
m["emojiRemoteUrl"] = cur
except Exception:
pass
m["emojiUrl"] = (
base_url
+ f"/api/chat/media/emoji?account={quote(account_dir.name)}&md5={quote(md5)}&username={quote(username)}"
)
elif (not str(m.get("emojiUrl") or "")):
m["emojiUrl"] = (
base_url
+ f"/api/chat/media/emoji?account={quote(account_dir.name)}&md5={quote(md5)}&username={quote(username)}"
)
elif rt == "video":
video_thumb_url = str(m.get("videoThumbUrl") or "").strip()
video_thumb_md5 = str(m.get("videoThumbMd5") or "").strip()
video_thumb_file_id = str(m.get("videoThumbFileId") or "").strip()
if (not video_thumb_url) or (
not video_thumb_url.lower().startswith(("http://", "https://"))
):
if video_thumb_md5:
m["videoThumbUrl"] = (
base_url
+ f"/api/chat/media/video_thumb?account={quote(account_dir.name)}&md5={quote(video_thumb_md5)}&username={quote(username)}"
)
elif video_thumb_file_id:
m["videoThumbUrl"] = (
base_url
+ f"/api/chat/media/video_thumb?account={quote(account_dir.name)}&file_id={quote(video_thumb_file_id)}&username={quote(username)}"
)
video_url = str(m.get("videoUrl") or "").strip()
video_md5 = str(m.get("videoMd5") or "").strip()
video_file_id = str(m.get("videoFileId") or "").strip()
if (not video_url) or (not video_url.lower().startswith(("http://", "https://"))):
if video_md5:
m["videoUrl"] = (
base_url
+ f"/api/chat/media/video?account={quote(account_dir.name)}&md5={quote(video_md5)}&username={quote(username)}"
)
elif video_file_id:
m["videoUrl"] = (
base_url
+ f"/api/chat/media/video?account={quote(account_dir.name)}&file_id={quote(video_file_id)}&username={quote(username)}"
)
elif rt == "voice":
if str(m.get("serverId") or ""):
sid = int(m.get("serverId") or 0)
if sid:
m["voiceUrl"] = base_url + f"/api/chat/media/voice?account={quote(account_dir.name)}&server_id={sid}"
except Exception:
pass
if int(m.get("type") or 0) == 266287972401:
raw = str(m.get("_rawText") or "")
if raw:
m["content"] = _parse_pat_message(raw, sender_contact_rows)
if "_rawText" in m:
m.pop("_rawText", None)
@router.get("/api/chat/accounts", summary="列出已解密账号")
async def list_chat_accounts():
"""列出 output/databases 下可用于聊天预览的账号目录"""
accounts = _list_decrypted_accounts()
if not accounts:
return {
"status": "error",
"accounts": [],
"default_account": None,
"message": "No decrypted databases found. Please decrypt first.",
}
return {
"status": "success",
"accounts": accounts,
"default_account": accounts[0],
}
@router.get("/api/chat/sessions", summary="获取会话列表(聊天左侧列表)")
async def list_chat_sessions(
request: Request,
account: Optional[str] = None,
limit: int = 400,
include_hidden: bool = False,
include_official: bool = False,
preview: str = "latest",
):
"""从 session.db + contact.db 读取会话列表,用于前端聊天界面动态渲染联系人"""
if limit <= 0:
raise HTTPException(status_code=400, detail="Invalid limit.")
if limit > 2000:
limit = 2000
account_dir = _resolve_account_dir(account)
session_db_path = account_dir / "session.db"
contact_db_path = account_dir / "contact.db"
head_image_db_path = account_dir / "head_image.db"
base_url = str(request.base_url).rstrip("/")
sconn = sqlite3.connect(str(session_db_path))
sconn.row_factory = sqlite3.Row
try:
try:
rows = sconn.execute(
"""
SELECT
username,
unread_count,
is_hidden,
summary,
draft,
last_timestamp,
sort_timestamp,
last_msg_locald_id,
last_msg_type,
last_msg_sub_type,
last_msg_sender,
last_sender_display_name
FROM SessionTable
ORDER BY sort_timestamp DESC
LIMIT ?
""",
(int(limit),),
).fetchall()
except sqlite3.OperationalError:
rows = sconn.execute(
"""
SELECT
username,
unread_count,
is_hidden,
summary,
draft,
last_timestamp,
sort_timestamp,
last_msg_type,
last_msg_sub_type
FROM SessionTable
ORDER BY sort_timestamp DESC
LIMIT ?
""",
(int(limit),),
).fetchall()
finally:
sconn.close()
filtered: list[sqlite3.Row] = []
usernames: list[str] = []
for r in rows:
username = r["username"] or ""
if not username:
continue
if not include_hidden and int(r["is_hidden"] or 0) == 1:
continue
if not _should_keep_session(username, include_official=include_official):
continue
filtered.append(r)
usernames.append(username)
contact_rows = _load_contact_rows(contact_db_path, usernames)
local_avatar_usernames = _query_head_image_usernames(head_image_db_path, usernames)
preview_mode = str(preview or "").strip().lower()
if preview_mode not in {"latest", "index", "session", "db", "none"}:
preview_mode = "latest"
if preview_mode == "index":
preview_mode = "latest"
last_previews: dict[str, str] = {}
if preview_mode == "latest":
try:
last_previews = load_session_last_messages(account_dir, usernames)
# Backward-compatible: old decrypted accounts may not have built the cache table yet.
if (not last_previews) and usernames:
build_session_last_message_table(
account_dir,
rebuild=False,
include_hidden=True,
include_official=True,
)
last_previews = load_session_last_messages(account_dir, usernames)
except Exception:
last_previews = {}
if preview_mode in {"latest", "db"}:
targets = usernames if preview_mode == "db" else [u for u in usernames if u and (u not in last_previews)]
if targets:
legacy = _load_latest_message_previews(account_dir, targets)
for u, v in legacy.items():
if v:
last_previews[u] = v
sessions: list[dict[str, Any]] = []
for r in filtered:
username = r["username"]
c_row = contact_rows.get(username)
display_name = _pick_display_name(c_row, username)
avatar_url = _pick_avatar_url(c_row)
if not avatar_url and username in local_avatar_usernames:
avatar_url = base_url + _build_avatar_url(account_dir.name, username)
last_message = ""
if preview_mode == "session":
draft_text = _decode_sqlite_text(r["draft"]).strip()
if draft_text:
draft_text = re.sub(r"\s+", " ", draft_text).strip()
last_message = f"[草稿] {draft_text}" if draft_text else "[草稿]"
else:
summary_text = _decode_sqlite_text(r["summary"]).strip()
summary_text = re.sub(r"\s+", " ", summary_text).strip()
if summary_text:
last_message = summary_text
else:
last_message = _infer_last_message_brief(r["last_msg_type"], r["last_msg_sub_type"])
elif preview_mode in {"latest", "db"}:
if str(last_previews.get(username) or "").strip():
last_message = str(last_previews.get(username) or "").strip()
elif preview_mode != "none":
summary_text = _decode_sqlite_text(r["summary"]).strip()
summary_text = re.sub(r"\s+", " ", summary_text).strip()
if summary_text:
last_message = summary_text
else:
last_message = _infer_last_message_brief(r["last_msg_type"], r["last_msg_sub_type"])
elif preview_mode != "none":
summary_text = _decode_sqlite_text(r["summary"]).strip()
summary_text = re.sub(r"\s+", " ", summary_text).strip()
if summary_text:
last_message = summary_text
else:
last_message = _infer_last_message_brief(r["last_msg_type"], r["last_msg_sub_type"])
last_time = _format_session_time(r["sort_timestamp"] or r["last_timestamp"])
sessions.append(
{
"id": username,
"username": username,
"name": display_name,
"avatar": avatar_url,
"lastMessage": last_message,
"lastMessageTime": last_time,
"unreadCount": int(r["unread_count"] or 0),
"isGroup": bool(username.endswith("@chatroom")),
}
)
return {
"status": "success",
"account": account_dir.name,
"total": len(sessions),
"sessions": sessions,
}
def _collect_chat_messages(
*,
username: str,
account_dir: Path,
db_paths: list[Path],
resource_conn: Optional[sqlite3.Connection],
resource_chat_id: Optional[int],
take: int,
want_types: Optional[set[str]],
) -> tuple[list[dict[str, Any]], bool, list[str], list[str], set[str]]:
is_group = bool(username.endswith("@chatroom"))
take = int(take)
if take < 0:
take = 0
take_probe = take + 1
merged: list[dict[str, Any]] = []
sender_usernames: list[str] = []
quote_usernames: list[str] = []
pat_usernames: set[str] = set()
has_more_any = False
for db_path in db_paths:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
try:
table_name = _resolve_msg_table_name(conn, username)
if not table_name:
continue
my_wxid = account_dir.name
my_rowid = None
try:
r = conn.execute(
"SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1",
(my_wxid,),
).fetchone()
if r is not None:
my_rowid = int(r[0])
except Exception:
my_rowid = None
quoted_table = _quote_ident(table_name)
sql_with_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, n.user_name AS sender_username "
f"FROM {quoted_table} m "
"LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid "
"ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC "
"LIMIT ?"
)
sql_no_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, '' AS sender_username "
f"FROM {quoted_table} m "
"ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC "
"LIMIT ?"
)
# Force sqlite3 to return TEXT as raw bytes for this query, so we can zstd-decompress
# compress_content reliably.
conn.text_factory = bytes
try:
rows = conn.execute(sql_with_join, (take_probe,)).fetchall()
except Exception:
rows = conn.execute(sql_no_join, (take_probe,)).fetchall()
if len(rows) > take:
has_more_any = True
rows = rows[:take]
for r in rows:
local_id = int(r["local_id"] or 0)
create_time = int(r["create_time"] or 0)
sort_seq = int(r["sort_seq"] or 0) if r["sort_seq"] is not None else 0
local_type = int(r["local_type"] or 0)
sender_username = _decode_sqlite_text(r["sender_username"]).strip()
is_sent = False
if my_rowid is not None:
try:
is_sent = int(r["real_sender_id"] or 0) == int(my_rowid)
except Exception:
is_sent = False
raw_text = _decode_message_content(r["compress_content"], r["message_content"])
raw_text = raw_text.strip()
sender_prefix = ""
if is_group and not raw_text.startswith("<") and not raw_text.startswith('"<'):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
if is_group and sender_prefix:
sender_username = sender_prefix
if is_group and (raw_text.startswith("<") or raw_text.startswith('"<')):
xml_sender = _extract_sender_from_group_xml(raw_text)
if xml_sender:
sender_username = xml_sender
if is_sent:
sender_username = account_dir.name
elif (not is_group) and (not sender_username):
sender_username = username
render_type = "text"
content_text = raw_text
title = ""
url = ""
image_md5 = ""
emoji_md5 = ""
emoji_url = ""
thumb_url = ""
image_url = ""
image_file_id = ""
video_md5 = ""
video_thumb_md5 = ""
video_file_id = ""
video_thumb_file_id = ""
video_url = ""
video_thumb_url = ""
voice_length = ""
quote_username = ""
quote_title = ""
quote_content = ""
quote_server_id = ""
quote_type = ""
quote_voice_length = ""
amount = ""
cover_url = ""
file_size = ""
pay_sub_type = ""
transfer_status = ""
file_md5 = ""
transfer_id = ""
voip_type = ""
if local_type == 10000:
render_type = "system"
if "revokemsg" in raw_text:
content_text = "撤回了一条消息"
else:
import re
content_text = re.sub(r"</?[_a-zA-Z0-9]+[^>]*>", "", raw_text)
content_text = re.sub(r"\s+", " ", content_text).strip() or "[系统消息]"
elif local_type == 49:
parsed = _parse_app_message(raw_text)
render_type = str(parsed.get("renderType") or "text")
content_text = str(parsed.get("content") or "")
title = str(parsed.get("title") or "")
url = str(parsed.get("url") or "")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
quote_username = str(parsed.get("quoteUsername") or "")
quote_server_id = str(parsed.get("quoteServerId") or "")
quote_type = str(parsed.get("quoteType") or "")
quote_voice_length = str(parsed.get("quoteVoiceLength") or "")
amount = str(parsed.get("amount") or "")
cover_url = str(parsed.get("coverUrl") or "")
thumb_url = str(parsed.get("thumbUrl") or "")
file_size = str(parsed.get("size") or "")
pay_sub_type = str(parsed.get("paySubType") or "")
file_md5 = str(parsed.get("fileMd5") or "")
transfer_id = str(parsed.get("transferId") or "")
if render_type == "transfer":
# 直接从原始 XML 提取 transferid可能在 wcpayinfo 内)
if not transfer_id:
transfer_id = _extract_xml_tag_or_attr(raw_text, "transferid") or ""
transfer_status = _infer_transfer_status_text(
is_sent=is_sent,
paysubtype=pay_sub_type,
receivestatus=str(parsed.get("receiveStatus") or ""),
sendertitle=str(parsed.get("senderTitle") or ""),
receivertitle=str(parsed.get("receiverTitle") or ""),
senderdes=str(parsed.get("senderDes") or ""),
receiverdes=str(parsed.get("receiverDes") or ""),
)
if not content_text:
content_text = transfer_status or "转账"
elif local_type == 266287972401:
render_type = "system"
template = _extract_xml_tag_text(raw_text, "template")
if template:
import re
pat_usernames.update({m.group(1) for m in re.finditer(r"\$\{([^}]+)\}", template) if m.group(1)})
content_text = "[拍一拍]"
else:
content_text = "[拍一拍]"
elif local_type == 244813135921:
render_type = "quote"
parsed = _parse_app_message(raw_text)
content_text = str(parsed.get("content") or "[引用消息]")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
quote_username = str(parsed.get("quoteUsername") or "")
quote_server_id = str(parsed.get("quoteServerId") or "")
quote_type = str(parsed.get("quoteType") or "")
quote_voice_length = str(parsed.get("quoteVoiceLength") or "")
elif local_type == 3:
render_type = "image"
# 先尝试从 XML 中提取 md5不同版本字段可能不同
image_md5 = _extract_xml_attr(raw_text, "md5") or _extract_xml_tag_text(raw_text, "md5")
if not image_md5:
for k in [
"cdnthumbmd5",
"cdnthumd5",
"cdnmidimgmd5",
"cdnbigimgmd5",
"hdmd5",
"hevc_mid_md5",
"hevc_md5",
"imgmd5",
"filemd5",
]:
image_md5 = _extract_xml_attr(raw_text, k) or _extract_xml_tag_text(raw_text, k)
if image_md5:
break
# Prefer message_resource.db md5 for local files: XML md5 frequently differs from the on-disk *.dat basename
# (especially for *_t.dat thumbnails), causing the media endpoint to 404.
if resource_conn is not None:
try:
resource_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
except Exception:
resource_md5 = ""
resource_md5 = str(resource_md5 or "").strip().lower()
if len(resource_md5) == 32 and all(c in "0123456789abcdef" for c in resource_md5):
image_md5 = resource_md5
# Extract CDN URL (some versions store a non-HTTP "file id" string here)
_cdn_url_or_id = (
_extract_xml_attr(raw_text, "cdnthumburl")
or _extract_xml_attr(raw_text, "cdnthumurl")
or _extract_xml_attr(raw_text, "cdnmidimgurl")
or _extract_xml_attr(raw_text, "cdnbigimgurl")
or _extract_xml_tag_text(raw_text, "cdnthumburl")
or _extract_xml_tag_text(raw_text, "cdnthumurl")
or _extract_xml_tag_text(raw_text, "cdnmidimgurl")
or _extract_xml_tag_text(raw_text, "cdnbigimgurl")
)
_cdn_url_or_id = str(_cdn_url_or_id or "").strip()
image_url = _cdn_url_or_id if _cdn_url_or_id.startswith(("http://", "https://")) else ""
if (not image_url) and _cdn_url_or_id:
image_file_id = _cdn_url_or_id
content_text = "[图片]"
elif local_type == 34:
render_type = "voice"
duration = _extract_xml_attr(raw_text, "voicelength")
voice_length = duration
content_text = f"[语音 {duration}秒]" if duration else "[语音]"
elif local_type == 43 or local_type == 62:
render_type = "video"
video_md5 = _extract_xml_attr(raw_text, "md5")
video_thumb_md5 = _extract_xml_attr(raw_text, "cdnthumbmd5")
video_thumb_url_or_id = _extract_xml_attr(raw_text, "cdnthumburl") or _extract_xml_tag_text(
raw_text, "cdnthumburl"
)
video_url_or_id = _extract_xml_attr(raw_text, "cdnvideourl") or _extract_xml_tag_text(
raw_text, "cdnvideourl"
)
video_thumb_url = (
video_thumb_url_or_id
if str(video_thumb_url_or_id or "").strip().lower().startswith(("http://", "https://"))
else ""
)
video_url = (
video_url_or_id
if str(video_url_or_id or "").strip().lower().startswith(("http://", "https://"))
else ""
)
video_thumb_file_id = "" if video_thumb_url else (str(video_thumb_url_or_id or "").strip() or "")
video_file_id = "" if video_url else (str(video_url_or_id or "").strip() or "")
if (not video_thumb_md5) and resource_conn is not None:
video_thumb_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
content_text = "[视频]"
elif local_type == 47:
render_type = "emoji"
emoji_md5 = _extract_xml_attr(raw_text, "md5")
if not emoji_md5:
emoji_md5 = _extract_xml_tag_text(raw_text, "md5")
emoji_url = _extract_xml_attr(raw_text, "cdnurl")
if not emoji_url:
emoji_url = _extract_xml_tag_text(raw_text, "cdn_url")
if (not emoji_md5) and resource_conn is not None:
emoji_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
content_text = "[表情]"
elif local_type == 50:
render_type = "voip"
try:
import re
block = raw_text
m_voip = re.search(
r"(<VoIPBubbleMsg[^>]*>.*?</VoIPBubbleMsg>)",
raw_text,
flags=re.IGNORECASE | re.DOTALL,
)
if m_voip:
block = m_voip.group(1) or raw_text
room_type = str(_extract_xml_tag_text(block, "room_type") or "").strip()
if room_type == "0":
voip_type = "video"
elif room_type == "1":
voip_type = "audio"
voip_msg = str(_extract_xml_tag_text(block, "msg") or "").strip()
content_text = voip_msg or "通话"
except Exception:
content_text = "通话"
elif local_type != 1:
if not content_text:
content_text = _infer_message_brief_by_local_type(local_type)
else:
if content_text.startswith("<") or content_text.startswith('"<'):
if "<appmsg" in content_text.lower():
parsed = _parse_app_message(content_text)
rt = str(parsed.get("renderType") or "")
if rt and rt != "text":
render_type = rt
content_text = str(parsed.get("content") or content_text)
title = str(parsed.get("title") or title)
url = str(parsed.get("url") or url)
quote_title = str(parsed.get("quoteTitle") or quote_title)
quote_content = str(parsed.get("quoteContent") or quote_content)
amount = str(parsed.get("amount") or amount)
cover_url = str(parsed.get("coverUrl") or cover_url)
thumb_url = str(parsed.get("thumbUrl") or thumb_url)
file_size = str(parsed.get("size") or file_size)
pay_sub_type = str(parsed.get("paySubType") or pay_sub_type)
file_md5 = str(parsed.get("fileMd5") or file_md5)
transfer_id = str(parsed.get("transferId") or transfer_id)
if render_type == "transfer":
# 如果 transferId 仍为空,尝试从原始 XML 提取
if not transfer_id:
transfer_id = _extract_xml_tag_or_attr(content_text, "transferid") or ""
transfer_status = _infer_transfer_status_text(
is_sent=is_sent,
paysubtype=pay_sub_type,
receivestatus=str(parsed.get("receiveStatus") or ""),
sendertitle=str(parsed.get("senderTitle") or ""),
receivertitle=str(parsed.get("receiverTitle") or ""),
senderdes=str(parsed.get("senderDes") or ""),
receiverdes=str(parsed.get("receiverDes") or ""),
)
if not content_text:
content_text = transfer_status or "转账"
t = _extract_xml_tag_text(content_text, "title")
d = _extract_xml_tag_text(content_text, "des")
content_text = t or d or _infer_message_brief_by_local_type(local_type)
if not content_text:
content_text = _infer_message_brief_by_local_type(local_type)
if want_types is not None:
rt_key = _normalize_render_type_key(render_type)
if rt_key not in want_types:
continue
if sender_username:
sender_usernames.append(sender_username)
if quote_username:
quote_usernames.append(str(quote_username).strip())
merged.append(
{
"id": f"{db_path.stem}:{table_name}:{local_id}",
"localId": local_id,
"serverId": int(r["server_id"] or 0),
"serverIdStr": str(int(r["server_id"] or 0)) if int(r["server_id"] or 0) else "",
"type": local_type,
"createTime": create_time,
"sortSeq": sort_seq,
"senderUsername": sender_username,
"isSent": bool(is_sent),
"renderType": render_type,
"content": content_text,
"title": title,
"url": url,
"imageMd5": image_md5,
"imageFileId": image_file_id,
"emojiMd5": emoji_md5,
"emojiUrl": emoji_url,
"thumbUrl": thumb_url,
"imageUrl": image_url,
"videoMd5": video_md5,
"videoThumbMd5": video_thumb_md5,
"videoFileId": video_file_id,
"videoThumbFileId": video_thumb_file_id,
"videoUrl": video_url,
"videoThumbUrl": video_thumb_url,
"voiceLength": voice_length,
"voipType": voip_type,
"quoteUsername": str(quote_username).strip(),
"quoteServerId": str(quote_server_id).strip(),
"quoteType": str(quote_type).strip(),
"quoteVoiceLength": str(quote_voice_length).strip(),
"quoteTitle": quote_title,
"quoteContent": quote_content,
"amount": amount,
"coverUrl": cover_url,
"fileSize": file_size,
"fileMd5": file_md5,
"paySubType": pay_sub_type,
"transferStatus": transfer_status,
"transferId": transfer_id,
"_rawText": raw_text if local_type == 266287972401 else "",
}
)
finally:
conn.close()
return merged, has_more_any, sender_usernames, quote_usernames, pat_usernames
@router.get("/api/chat/messages", summary="获取会话消息列表")
async def list_chat_messages(
request: Request,
username: str,
account: Optional[str] = None,
limit: int = 50,
offset: int = 0,
order: str = "asc",
render_types: Optional[str] = None,
):
if not username:
raise HTTPException(status_code=400, detail="Missing username.")
if limit <= 0:
raise HTTPException(status_code=400, detail="Invalid limit.")
if limit > 500:
limit = 500
if offset < 0:
offset = 0
account_dir = _resolve_account_dir(account)
db_paths = _iter_message_db_paths(account_dir)
contact_db_path = account_dir / "contact.db"
head_image_db_path = account_dir / "head_image.db"
message_resource_db_path = account_dir / "message_resource.db"
base_url = str(request.base_url).rstrip("/")
if not db_paths:
return {
"status": "error",
"account": account_dir.name,
"username": username,
"total": 0,
"messages": [],
"message": "No message databases found for this account.",
}
resource_conn: Optional[sqlite3.Connection] = None
resource_chat_id: Optional[int] = None
try:
if message_resource_db_path.exists():
resource_conn = sqlite3.connect(str(message_resource_db_path))
resource_conn.row_factory = sqlite3.Row
resource_chat_id = _resource_lookup_chat_id(resource_conn, username)
except Exception:
if resource_conn is not None:
try:
resource_conn.close()
except Exception:
pass
resource_conn = None
resource_chat_id = None
want_asc = str(order or "").lower() != "desc"
want_types: Optional[set[str]] = None
if render_types is not None:
parts = [p.strip() for p in str(render_types or "").split(",") if p.strip()]
want = {_normalize_render_type_key(p) for p in parts}
want.discard("")
if want and not ({"all", "any", "none"} & want):
want_types = want
scan_take = int(limit) + int(offset)
if scan_take < 0:
scan_take = 0
merged: list[dict[str, Any]] = []
sender_usernames: list[str] = []
quote_usernames: list[str] = []
pat_usernames: set[str] = set()
has_more_any = False
while True:
(
merged,
has_more_any,
sender_usernames,
quote_usernames,
pat_usernames,
) = _collect_chat_messages(
username=username,
account_dir=account_dir,
db_paths=db_paths,
resource_conn=resource_conn,
resource_chat_id=resource_chat_id,
take=scan_take,
want_types=want_types,
)
if want_types is None:
break
if (len(merged) >= (int(offset) + int(limit))) or (not has_more_any):
break
next_take = scan_take * 2 if scan_take > 0 else (int(limit) + int(offset))
if next_take <= scan_take:
break
scan_take = next_take
r"""
take = int(limit) + int(offset)
take_probe = take + 1
merged: list[dict[str, Any]] = []
sender_usernames: list[str] = []
quote_usernames: list[str] = []
pat_usernames: set[str] = set()
is_group = bool(username.endswith("@chatroom"))
has_more_any = False
for db_path in db_paths:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
try:
table_name = _resolve_msg_table_name(conn, username)
if not table_name:
continue
my_wxid = account_dir.name
my_rowid = None
try:
r = conn.execute(
"SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1",
(my_wxid,),
).fetchone()
if r is not None:
my_rowid = int(r[0])
except Exception:
my_rowid = None
quoted_table = _quote_ident(table_name)
sql_with_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, n.user_name AS sender_username "
f"FROM {quoted_table} m "
"LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid "
"ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC "
"LIMIT ?"
)
sql_no_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, '' AS sender_username "
f"FROM {quoted_table} m "
"ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC "
"LIMIT ?"
)
# Force sqlite3 to return TEXT as raw bytes for this query, so we can zstd-decompress
# compress_content reliably.
conn.text_factory = bytes
try:
rows = conn.execute(sql_with_join, (take_probe,)).fetchall()
except Exception:
rows = conn.execute(sql_no_join, (take_probe,)).fetchall()
if len(rows) > take:
has_more_any = True
rows = rows[:take]
for r in rows:
local_id = int(r["local_id"] or 0)
create_time = int(r["create_time"] or 0)
sort_seq = int(r["sort_seq"] or 0) if r["sort_seq"] is not None else 0
local_type = int(r["local_type"] or 0)
sender_username = _decode_sqlite_text(r["sender_username"]).strip()
is_sent = False
if my_rowid is not None:
try:
is_sent = int(r["real_sender_id"] or 0) == int(my_rowid)
except Exception:
is_sent = False
raw_text = _decode_message_content(r["compress_content"], r["message_content"])
raw_text = raw_text.strip()
sender_prefix = ""
if is_group and not raw_text.startswith("<") and not raw_text.startswith('"<'):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
if is_group and sender_prefix:
sender_username = sender_prefix
if is_group and (raw_text.startswith("<") or raw_text.startswith('"<')):
xml_sender = _extract_sender_from_group_xml(raw_text)
if xml_sender:
sender_username = xml_sender
if is_sent:
sender_username = account_dir.name
elif (not is_group) and (not sender_username):
sender_username = username
if sender_username:
sender_usernames.append(sender_username)
render_type = "text"
content_text = raw_text
title = ""
url = ""
image_md5 = ""
emoji_md5 = ""
emoji_url = ""
thumb_url = ""
image_url = ""
image_file_id = ""
video_md5 = ""
video_thumb_md5 = ""
video_file_id = ""
video_thumb_file_id = ""
video_url = ""
video_thumb_url = ""
voice_length = ""
quote_username = ""
quote_title = ""
quote_content = ""
quote_server_id = ""
quote_type = ""
quote_voice_length = ""
amount = ""
cover_url = ""
file_size = ""
pay_sub_type = ""
transfer_status = ""
file_md5 = ""
transfer_id = ""
voip_type = ""
if local_type == 10000:
render_type = "system"
if "revokemsg" in raw_text:
content_text = "撤回了一条消息"
else:
import re
content_text = re.sub(r"</?[_a-zA-Z0-9]+[^>]*>", "", raw_text)
content_text = re.sub(r"\s+", " ", content_text).strip() or "[系统消息]"
elif local_type == 49:
parsed = _parse_app_message(raw_text)
render_type = str(parsed.get("renderType") or "text")
content_text = str(parsed.get("content") or "")
title = str(parsed.get("title") or "")
url = str(parsed.get("url") or "")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
quote_username = str(parsed.get("quoteUsername") or "")
quote_server_id = str(parsed.get("quoteServerId") or "")
quote_type = str(parsed.get("quoteType") or "")
quote_voice_length = str(parsed.get("quoteVoiceLength") or "")
amount = str(parsed.get("amount") or "")
cover_url = str(parsed.get("coverUrl") or "")
thumb_url = str(parsed.get("thumbUrl") or "")
file_size = str(parsed.get("size") or "")
pay_sub_type = str(parsed.get("paySubType") or "")
file_md5 = str(parsed.get("fileMd5") or "")
transfer_id = str(parsed.get("transferId") or "")
if render_type == "transfer":
# 直接从原始 XML 提取 transferid可能在 wcpayinfo 内)
if not transfer_id:
transfer_id = _extract_xml_tag_or_attr(raw_text, "transferid") or ""
transfer_status = _infer_transfer_status_text(
is_sent=is_sent,
paysubtype=pay_sub_type,
receivestatus=str(parsed.get("receiveStatus") or ""),
sendertitle=str(parsed.get("senderTitle") or ""),
receivertitle=str(parsed.get("receiverTitle") or ""),
senderdes=str(parsed.get("senderDes") or ""),
receiverdes=str(parsed.get("receiverDes") or ""),
)
if not content_text:
content_text = transfer_status or "转账"
elif local_type == 266287972401:
render_type = "system"
template = _extract_xml_tag_text(raw_text, "template")
if template:
import re
pat_usernames.update({m.group(1) for m in re.finditer(r"\$\{([^}]+)\}", template) if m.group(1)})
content_text = "[拍一拍]"
else:
content_text = "[拍一拍]"
elif local_type == 244813135921:
render_type = "quote"
parsed = _parse_app_message(raw_text)
content_text = str(parsed.get("content") or "[引用消息]")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
quote_username = str(parsed.get("quoteUsername") or "")
quote_server_id = str(parsed.get("quoteServerId") or "")
quote_type = str(parsed.get("quoteType") or "")
quote_voice_length = str(parsed.get("quoteVoiceLength") or "")
elif local_type == 3:
render_type = "image"
# 先尝试从 XML 中提取 md5不同版本字段可能不同
image_md5 = _extract_xml_attr(raw_text, "md5") or _extract_xml_tag_text(raw_text, "md5")
if not image_md5:
for k in [
"cdnthumbmd5",
"cdnthumd5",
"cdnmidimgmd5",
"cdnbigimgmd5",
"hdmd5",
"hevc_mid_md5",
"hevc_md5",
"imgmd5",
"filemd5",
]:
image_md5 = _extract_xml_attr(raw_text, k) or _extract_xml_tag_text(raw_text, k)
if image_md5:
break
# Prefer message_resource.db md5 for local files: XML md5 frequently differs from the on-disk *.dat basename
# (especially for *_t.dat thumbnails), causing the media endpoint to 404.
if resource_conn is not None:
try:
resource_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
except Exception:
resource_md5 = ""
resource_md5 = str(resource_md5 or "").strip().lower()
if len(resource_md5) == 32 and all(c in "0123456789abcdef" for c in resource_md5):
image_md5 = resource_md5
# Extract CDN URL (some versions store a non-HTTP "file id" string here)
_cdn_url_or_id = (
_extract_xml_attr(raw_text, "cdnthumburl")
or _extract_xml_attr(raw_text, "cdnthumurl")
or _extract_xml_attr(raw_text, "cdnmidimgurl")
or _extract_xml_attr(raw_text, "cdnbigimgurl")
or _extract_xml_tag_text(raw_text, "cdnthumburl")
or _extract_xml_tag_text(raw_text, "cdnthumurl")
or _extract_xml_tag_text(raw_text, "cdnmidimgurl")
or _extract_xml_tag_text(raw_text, "cdnbigimgurl")
)
_cdn_url_or_id = str(_cdn_url_or_id or "").strip()
image_url = _cdn_url_or_id if _cdn_url_or_id.startswith(("http://", "https://")) else ""
if (not image_url) and _cdn_url_or_id:
image_file_id = _cdn_url_or_id
content_text = "[图片]"
elif local_type == 34:
render_type = "voice"
duration = _extract_xml_attr(raw_text, "voicelength")
voice_length = duration
content_text = f"[语音 {duration}秒]" if duration else "[语音]"
elif local_type == 43 or local_type == 62:
render_type = "video"
video_md5 = _extract_xml_attr(raw_text, "md5")
video_thumb_md5 = _extract_xml_attr(raw_text, "cdnthumbmd5")
video_thumb_url_or_id = _extract_xml_attr(raw_text, "cdnthumburl") or _extract_xml_tag_text(
raw_text, "cdnthumburl"
)
video_url_or_id = _extract_xml_attr(raw_text, "cdnvideourl") or _extract_xml_tag_text(
raw_text, "cdnvideourl"
)
video_thumb_url = (
video_thumb_url_or_id
if str(video_thumb_url_or_id or "").strip().lower().startswith(("http://", "https://"))
else ""
)
video_url = (
video_url_or_id
if str(video_url_or_id or "").strip().lower().startswith(("http://", "https://"))
else ""
)
video_thumb_file_id = "" if video_thumb_url else (str(video_thumb_url_or_id or "").strip() or "")
video_file_id = "" if video_url else (str(video_url_or_id or "").strip() or "")
if (not video_thumb_md5) and resource_conn is not None:
video_thumb_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
content_text = "[视频]"
elif local_type == 47:
render_type = "emoji"
emoji_md5 = _extract_xml_attr(raw_text, "md5")
if not emoji_md5:
emoji_md5 = _extract_xml_tag_text(raw_text, "md5")
emoji_url = _extract_xml_attr(raw_text, "cdnurl")
if not emoji_url:
emoji_url = _extract_xml_tag_text(raw_text, "cdn_url")
if (not emoji_md5) and resource_conn is not None:
emoji_md5 = _lookup_resource_md5(
resource_conn,
resource_chat_id,
message_local_type=local_type,
server_id=int(r["server_id"] or 0),
local_id=local_id,
create_time=create_time,
)
content_text = "[表情]"
elif local_type == 50:
render_type = "voip"
try:
import re
block = raw_text
m_voip = re.search(
r"(<VoIPBubbleMsg[^>]*>.*?</VoIPBubbleMsg>)",
raw_text,
flags=re.IGNORECASE | re.DOTALL,
)
if m_voip:
block = m_voip.group(1) or raw_text
room_type = str(_extract_xml_tag_text(block, "room_type") or "").strip()
if room_type == "0":
voip_type = "video"
elif room_type == "1":
voip_type = "audio"
voip_msg = str(_extract_xml_tag_text(block, "msg") or "").strip()
content_text = voip_msg or "通话"
except Exception:
content_text = "通话"
elif local_type != 1:
if not content_text:
content_text = _infer_message_brief_by_local_type(local_type)
else:
if content_text.startswith("<") or content_text.startswith('"<'):
if "<appmsg" in content_text.lower():
parsed = _parse_app_message(content_text)
rt = str(parsed.get("renderType") or "")
if rt and rt != "text":
render_type = rt
content_text = str(parsed.get("content") or content_text)
title = str(parsed.get("title") or title)
url = str(parsed.get("url") or url)
quote_title = str(parsed.get("quoteTitle") or quote_title)
quote_content = str(parsed.get("quoteContent") or quote_content)
amount = str(parsed.get("amount") or amount)
cover_url = str(parsed.get("coverUrl") or cover_url)
thumb_url = str(parsed.get("thumbUrl") or thumb_url)
file_size = str(parsed.get("size") or file_size)
pay_sub_type = str(parsed.get("paySubType") or pay_sub_type)
file_md5 = str(parsed.get("fileMd5") or file_md5)
transfer_id = str(parsed.get("transferId") or transfer_id)
if render_type == "transfer":
# 如果 transferId 仍为空,尝试从原始 XML 提取
if not transfer_id:
transfer_id = _extract_xml_tag_or_attr(content_text, "transferid") or ""
transfer_status = _infer_transfer_status_text(
is_sent=is_sent,
paysubtype=pay_sub_type,
receivestatus=str(parsed.get("receiveStatus") or ""),
sendertitle=str(parsed.get("senderTitle") or ""),
receivertitle=str(parsed.get("receiverTitle") or ""),
senderdes=str(parsed.get("senderDes") or ""),
receiverdes=str(parsed.get("receiverDes") or ""),
)
if not content_text:
content_text = transfer_status or "转账"
t = _extract_xml_tag_text(content_text, "title")
d = _extract_xml_tag_text(content_text, "des")
content_text = t or d or _infer_message_brief_by_local_type(local_type)
if not content_text:
content_text = _infer_message_brief_by_local_type(local_type)
if quote_username:
quote_usernames.append(str(quote_username).strip())
merged.append(
{
"id": f"{db_path.stem}:{table_name}:{local_id}",
"localId": local_id,
"serverId": int(r["server_id"] or 0),
"serverIdStr": str(int(r["server_id"] or 0)) if int(r["server_id"] or 0) else "",
"type": local_type,
"createTime": create_time,
"sortSeq": sort_seq,
"senderUsername": sender_username,
"isSent": bool(is_sent),
"renderType": render_type,
"content": content_text,
"title": title,
"url": url,
"imageMd5": image_md5,
"imageFileId": image_file_id,
"emojiMd5": emoji_md5,
"emojiUrl": emoji_url,
"thumbUrl": thumb_url,
"imageUrl": image_url,
"videoMd5": video_md5,
"videoThumbMd5": video_thumb_md5,
"videoFileId": video_file_id,
"videoThumbFileId": video_thumb_file_id,
"videoUrl": video_url,
"videoThumbUrl": video_thumb_url,
"voiceLength": voice_length,
"voipType": voip_type,
"quoteUsername": str(quote_username).strip(),
"quoteServerId": str(quote_server_id).strip(),
"quoteType": str(quote_type).strip(),
"quoteVoiceLength": str(quote_voice_length).strip(),
"quoteTitle": quote_title,
"quoteContent": quote_content,
"amount": amount,
"coverUrl": cover_url,
"fileSize": file_size,
"fileMd5": file_md5,
"paySubType": pay_sub_type,
"transferStatus": transfer_status,
"transferId": transfer_id,
"_rawText": raw_text if local_type == 266287972401 else "",
}
)
finally:
conn.close()
"""
if resource_conn is not None:
try:
resource_conn.close()
except Exception:
pass
# 后处理:关联转账消息的最终状态
# 策略:优先使用 transferId 精确匹配,回退到金额+时间窗口匹配
# paysubtype 含义1=不明确 3=已收款 4=对方退回给你 8=发起转账 9=被对方退回 10=已过期
# 收集已退还和已收款的转账ID和金额
returned_transfer_ids: set[str] = set() # 退还状态的 transferId
received_transfer_ids: set[str] = set() # 已收款状态的 transferId
returned_amounts_with_time: list[tuple[str, int]] = [] # (金额, 时间戳) 用于退还回退匹配
received_amounts_with_time: list[tuple[str, int]] = [] # (金额, 时间戳) 用于收款回退匹配
for m in merged:
if m.get("renderType") == "transfer":
pst = str(m.get("paySubType") or "")
tid = str(m.get("transferId") or "").strip()
amt = str(m.get("amount") or "")
ts = int(m.get("createTime") or 0)
if pst in ("4", "9"): # 退还状态
if tid:
returned_transfer_ids.add(tid)
if amt:
returned_amounts_with_time.append((amt, ts))
elif pst == "3": # 已收款状态
if tid:
received_transfer_ids.add(tid)
if amt:
received_amounts_with_time.append((amt, ts))
# 更新原始转账消息的状态
for m in merged:
if m.get("renderType") == "transfer":
pst = str(m.get("paySubType") or "")
# 只更新未确定状态的原始转账消息paysubtype=1 或 8
if pst in ("1", "8"):
tid = str(m.get("transferId") or "").strip()
amt = str(m.get("amount") or "")
ts = int(m.get("createTime") or 0)
# 优先检查退还状态(退还优先于收款)
should_mark_returned = False
should_mark_received = False
# 策略1精确 transferId 匹配
if tid:
if tid in returned_transfer_ids:
should_mark_returned = True
elif tid in received_transfer_ids:
should_mark_received = True
# 策略2回退到金额+时间窗口匹配24小时内同金额
if not should_mark_returned and not should_mark_received and amt:
for ret_amt, ret_ts in returned_amounts_with_time:
if ret_amt == amt and abs(ret_ts - ts) <= 86400:
should_mark_returned = True
break
if not should_mark_returned:
for rec_amt, rec_ts in received_amounts_with_time:
if rec_amt == amt and abs(rec_ts - ts) <= 86400:
should_mark_received = True
break
if should_mark_returned:
m["paySubType"] = "9"
m["transferStatus"] = "已被退还"
elif should_mark_received:
m["paySubType"] = "3"
# 根据 isSent 判断:发起方显示"已收款",收款方显示"已被接收"
is_sent = m.get("isSent", False)
m["transferStatus"] = "已收款" if is_sent else "已被接收"
uniq_senders = list(
dict.fromkeys(
[u for u in (sender_usernames + list(pat_usernames) + quote_usernames) if u]
)
)
sender_contact_rows = _load_contact_rows(contact_db_path, uniq_senders)
local_sender_avatars = _query_head_image_usernames(head_image_db_path, uniq_senders)
for m in merged:
su = str(m.get("senderUsername") or "")
if not su:
continue
row = sender_contact_rows.get(su)
m["senderDisplayName"] = _pick_display_name(row, su)
avatar_url = _pick_avatar_url(row)
if not avatar_url and su in local_sender_avatars:
avatar_url = base_url + _build_avatar_url(account_dir.name, su)
m["senderAvatar"] = avatar_url
qu = str(m.get("quoteUsername") or "").strip()
if qu:
qrow = sender_contact_rows.get(qu)
qt = str(m.get("quoteTitle") or "").strip()
if qrow is not None:
remark = ""
try:
remark = str(qrow["remark"] or "").strip()
except Exception:
remark = ""
if remark:
m["quoteTitle"] = remark
elif not qt:
m["quoteTitle"] = _pick_display_name(qrow, qu)
elif not qt:
m["quoteTitle"] = qu
# Media URL fallback: if CDN URLs missing, use local media endpoints.
try:
rt = str(m.get("renderType") or "")
if rt == "image":
if not str(m.get("imageUrl") or ""):
md5 = str(m.get("imageMd5") or "").strip()
file_id = str(m.get("imageFileId") or "").strip()
if md5:
m["imageUrl"] = (
base_url
+ f"/api/chat/media/image?account={quote(account_dir.name)}&md5={quote(md5)}&username={quote(username)}"
)
elif file_id:
m["imageUrl"] = (
base_url
+ f"/api/chat/media/image?account={quote(account_dir.name)}&file_id={quote(file_id)}&username={quote(username)}"
)
elif rt == "emoji":
md5 = str(m.get("emojiMd5") or "")
if md5:
existing_local: Optional[Path] = None
try:
existing_local = _try_find_decrypted_resource(account_dir, str(md5).lower())
except Exception:
existing_local = None
if existing_local:
try:
import re
cur = str(m.get("emojiUrl") or "")
if cur and re.match(r"^https?://", cur, flags=re.I) and ("/api/chat/media/emoji" not in cur):
m["emojiRemoteUrl"] = cur
except Exception:
pass
m["emojiUrl"] = (
base_url
+ f"/api/chat/media/emoji?account={quote(account_dir.name)}&md5={quote(md5)}&username={quote(username)}"
)
elif (not str(m.get("emojiUrl") or "")):
m["emojiUrl"] = (
base_url
+ f"/api/chat/media/emoji?account={quote(account_dir.name)}&md5={quote(md5)}&username={quote(username)}"
)
elif rt == "video":
video_thumb_url = str(m.get("videoThumbUrl") or "").strip()
video_thumb_md5 = str(m.get("videoThumbMd5") or "").strip()
video_thumb_file_id = str(m.get("videoThumbFileId") or "").strip()
if (not video_thumb_url) or (
not video_thumb_url.lower().startswith(("http://", "https://"))
):
if video_thumb_md5:
m["videoThumbUrl"] = (
base_url
+ f"/api/chat/media/video_thumb?account={quote(account_dir.name)}&md5={quote(video_thumb_md5)}&username={quote(username)}"
)
elif video_thumb_file_id:
m["videoThumbUrl"] = (
base_url
+ f"/api/chat/media/video_thumb?account={quote(account_dir.name)}&file_id={quote(video_thumb_file_id)}&username={quote(username)}"
)
video_url = str(m.get("videoUrl") or "").strip()
video_md5 = str(m.get("videoMd5") or "").strip()
video_file_id = str(m.get("videoFileId") or "").strip()
if (not video_url) or (not video_url.lower().startswith(("http://", "https://"))):
if video_md5:
m["videoUrl"] = (
base_url
+ f"/api/chat/media/video?account={quote(account_dir.name)}&md5={quote(video_md5)}&username={quote(username)}"
)
elif video_file_id:
m["videoUrl"] = (
base_url
+ f"/api/chat/media/video?account={quote(account_dir.name)}&file_id={quote(video_file_id)}&username={quote(username)}"
)
elif rt == "voice":
if str(m.get("serverId") or ""):
sid = int(m.get("serverId") or 0)
if sid:
m["voiceUrl"] = base_url + f"/api/chat/media/voice?account={quote(account_dir.name)}&server_id={sid}"
except Exception:
pass
if int(m.get("type") or 0) == 266287972401:
raw = str(m.get("_rawText") or "")
if raw:
m["content"] = _parse_pat_message(raw, sender_contact_rows)
if "_rawText" in m:
m.pop("_rawText", None)
def sort_key(m: dict[str, Any]) -> tuple[int, int, int]:
sseq = int(m.get("sortSeq") or 0)
cts = int(m.get("createTime") or 0)
lid = int(m.get("localId") or 0)
return (cts, sseq, lid)
merged.sort(key=sort_key, reverse=True)
has_more_global = bool(has_more_any or (len(merged) > (int(offset) + int(limit))))
page = merged[int(offset) : int(offset) + int(limit)]
if want_asc:
page = list(reversed(page))
return {
"status": "success",
"account": account_dir.name,
"username": username,
"total": int(offset) + len(page) + (1 if has_more_global else 0),
"hasMore": bool(has_more_global),
"messages": page,
}
async def _search_chat_messages_via_fts(
request: Request,
*,
q: str,
account: Optional[str],
username: Optional[str],
sender: Optional[str],
session_type: Optional[str],
limit: int,
offset: int,
start_time: Optional[int],
end_time: Optional[int],
render_types: Optional[str],
include_hidden: bool,
include_official: bool,
) -> dict[str, Any]:
tokens = _make_search_tokens(q)
if not tokens:
raise HTTPException(status_code=400, detail="Missing q.")
if limit <= 0:
raise HTTPException(status_code=400, detail="Invalid limit.")
if limit > 200:
limit = 200
if offset < 0:
offset = 0
start_ts = int(start_time) if start_time is not None else None
end_ts = int(end_time) if end_time is not None else None
if start_ts is not None and start_ts < 0:
start_ts = 0
if end_ts is not None and end_ts < 0:
end_ts = 0
want_types: Optional[set[str]] = None
if render_types is not None:
parts = [p.strip() for p in str(render_types or "").split(",") if p.strip()]
want_types = {p for p in parts if p}
if not want_types:
want_types = None
username = str(username).strip() if username else None
if not username:
username = None
sender = str(sender).strip() if sender else None
if not sender:
sender = None
session_type_norm = _normalize_session_type(session_type)
account_dir = _resolve_account_dir(account)
contact_db_path = account_dir / "contact.db"
head_image_db_path = account_dir / "head_image.db"
base_url = str(request.base_url).rstrip("/")
index_status = get_chat_search_index_status(account_dir)
index = dict(index_status.get("index") or {})
build = dict(index.get("build") or {})
index_exists = bool(index.get("exists"))
index_ready = bool(index.get("ready"))
build_status = str(build.get("status") or "").strip()
if (not index_ready) and build_status not in {"building", "error"}:
start_chat_search_index_build(account_dir, rebuild=bool(index_exists))
index_status = get_chat_search_index_status(account_dir)
index = dict(index_status.get("index") or {})
build = dict(index.get("build") or {})
build_status = str(build.get("status") or "").strip()
index_exists = bool(index.get("exists"))
index_ready = bool(index.get("ready"))
if build_status == "error":
return {
"status": "index_error",
"account": account_dir.name,
"q": q,
"tokens": tokens,
"scope": "conversation" if username else "global",
"username": username,
"offset": int(offset),
"limit": int(limit),
"baseUrl": base_url,
"total": 0,
"hasMore": False,
"hits": [],
"index": index,
"message": str(build.get("error") or "Search index build failed."),
}
if not index_ready:
return {
"status": "index_building",
"account": account_dir.name,
"q": q,
"tokens": tokens,
"scope": "conversation" if username else "global",
"username": username,
"offset": int(offset),
"limit": int(limit),
"baseUrl": base_url,
"total": 0,
"hasMore": False,
"hits": [],
"index": index,
"message": "Search index is building. Please retry in a moment.",
}
fts_query = _build_fts_query(q)
if not fts_query:
raise HTTPException(status_code=400, detail="Missing q.")
index_db_path = get_chat_search_index_db_path(account_dir)
conn = sqlite3.connect(str(index_db_path))
conn.row_factory = sqlite3.Row
try:
try:
where_parts: list[str] = ["message_fts MATCH ?"]
params: list[Any] = [fts_query]
if username:
where_parts.append("username = ?")
params.append(str(username))
elif session_type_norm == "group":
where_parts.append("username LIKE ?")
params.append("%@chatroom")
elif session_type_norm == "single":
where_parts.append("username NOT LIKE ?")
params.append("%@chatroom")
if sender:
where_parts.append("sender_username = ?")
params.append(str(sender))
if want_types is not None:
types_sorted = sorted(want_types)
placeholders = ",".join(["?"] * len(types_sorted))
where_parts.append(f"render_type IN ({placeholders})")
params.extend(types_sorted)
if start_ts is not None:
where_parts.append("CAST(create_time AS INTEGER) >= ?")
params.append(int(start_ts))
if end_ts is not None:
where_parts.append("CAST(create_time AS INTEGER) <= ?")
params.append(int(end_ts))
if not include_hidden:
where_parts.append("CAST(is_hidden AS INTEGER) = 0")
if not include_official:
where_parts.append("CAST(is_official AS INTEGER) = 0")
where_sql = " AND ".join(where_parts)
total_row = conn.execute(f"SELECT COUNT(*) AS c FROM message_fts WHERE {where_sql}", params).fetchone()
total = int(total_row[0] or 0) if total_row is not None else 0
rows = conn.execute(
f"""
SELECT
username,
db_stem,
table_name,
local_id
FROM message_fts
WHERE {where_sql}
ORDER BY
CAST(create_time AS INTEGER) DESC,
CAST(sort_seq AS INTEGER) DESC,
CAST(local_id AS INTEGER) DESC
LIMIT ? OFFSET ?
""",
params + [int(limit), int(offset)],
).fetchall()
except Exception as e:
logger.exception("Chat search index query failed")
return {
"status": "index_error",
"account": account_dir.name,
"q": q,
"tokens": tokens,
"scope": "conversation" if username else "global",
"username": username,
"offset": int(offset),
"limit": int(limit),
"baseUrl": base_url,
"total": 0,
"hasMore": False,
"hits": [],
"index": index,
"message": str(e),
}
finally:
conn.close()
db_paths = _iter_message_db_paths(account_dir)
stem_to_path = {p.stem: p for p in db_paths}
groups: dict[tuple[Path, str, str], list[int]] = {}
ordered_keys: list[tuple[Path, str, str, int]] = []
for r in rows:
conv_username = str(r["username"] or "").strip()
db_stem = str(r["db_stem"] or "").strip()
table_name = str(r["table_name"] or "").strip()
local_id = int(r["local_id"] or 0)
if not conv_username or not db_stem or not table_name or local_id <= 0:
continue
db_path = stem_to_path.get(db_stem)
if db_path is None:
continue
groups.setdefault((db_path, table_name, conv_username), []).append(local_id)
ordered_keys.append((db_path, table_name, conv_username, local_id))
hit_by_key: dict[tuple[Path, str, str, int], dict[str, Any]] = {}
for (db_path, table_name, conv_username), local_ids in groups.items():
uniq_local_ids = list(dict.fromkeys([int(x) for x in local_ids if int(x) > 0]))
if not uniq_local_ids:
continue
msg_conn = sqlite3.connect(str(db_path))
msg_conn.row_factory = sqlite3.Row
msg_conn.text_factory = bytes
try:
my_rowid = None
try:
r2 = msg_conn.execute(
"SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1",
(account_dir.name,),
).fetchone()
if r2 is not None and r2[0] is not None:
my_rowid = int(r2[0])
except Exception:
my_rowid = None
placeholders = ",".join(["?"] * len(uniq_local_ids))
quoted_table = _quote_ident(table_name)
sql_with_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, n.user_name AS sender_username "
f"FROM {quoted_table} m "
"LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid "
f"WHERE m.local_id IN ({placeholders})"
)
sql_no_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, '' AS sender_username "
f"FROM {quoted_table} m "
f"WHERE m.local_id IN ({placeholders})"
)
try:
try:
msg_rows = msg_conn.execute(sql_with_join, uniq_local_ids).fetchall()
except Exception:
msg_rows = msg_conn.execute(sql_no_join, uniq_local_ids).fetchall()
except Exception:
continue
is_group = bool(conv_username.endswith("@chatroom"))
for rr in msg_rows:
local_id = int(rr["local_id"] or 0)
if local_id <= 0:
continue
try:
hit = _row_to_search_hit(
rr,
db_path=db_path,
table_name=table_name,
username=conv_username,
account_dir=account_dir,
is_group=is_group,
my_rowid=my_rowid,
)
except Exception:
continue
hay_items = [
str(hit.get("content") or ""),
str(hit.get("title") or ""),
str(hit.get("url") or ""),
str(hit.get("quoteTitle") or ""),
str(hit.get("quoteContent") or ""),
str(hit.get("amount") or ""),
]
haystack = "\n".join([x for x in hay_items if x.strip()])
snippet_src = (
str(hit.get("content") or "").strip()
or str(hit.get("title") or "").strip()
or haystack
)
hit["snippet"] = _make_snippet(snippet_src, tokens)
hit_by_key[(db_path, table_name, conv_username, local_id)] = hit
finally:
msg_conn.close()
hits: list[dict[str, Any]] = []
for k in ordered_keys:
h = hit_by_key.get(k)
if h is not None:
hits.append(h)
scope = "conversation" if username else "global"
if username:
uniq_usernames = list(dict.fromkeys([username] + [str(x.get("senderUsername") or "") for x in hits]))
contact_rows = _load_contact_rows(contact_db_path, uniq_usernames)
local_avatar_usernames = _query_head_image_usernames(head_image_db_path, uniq_usernames)
conv_row = contact_rows.get(username)
conv_name = _pick_display_name(conv_row, username)
conv_avatar = _pick_avatar_url(conv_row)
if (not conv_avatar) and (username in local_avatar_usernames):
conv_avatar = base_url + _build_avatar_url(account_dir.name, username)
for h in hits:
su = str(h.get("senderUsername") or "").strip()
h["conversationName"] = conv_name
h["conversationAvatar"] = conv_avatar
if su:
row = contact_rows.get(su)
h["senderDisplayName"] = (
_pick_display_name(row, su)
if row is not None
else (conv_name if su == username else su)
)
avatar_url = _pick_avatar_url(row)
if (not avatar_url) and (su in local_avatar_usernames):
avatar_url = base_url + _build_avatar_url(account_dir.name, su)
h["senderAvatar"] = avatar_url
else:
uniq_contacts = list(
dict.fromkeys(
[str(x.get("username") or "") for x in hits] + [str(x.get("senderUsername") or "") for x in hits]
)
)
contact_rows = _load_contact_rows(contact_db_path, uniq_contacts)
local_avatar_usernames = _query_head_image_usernames(head_image_db_path, uniq_contacts)
for h in hits:
cu = str(h.get("username") or "").strip()
su = str(h.get("senderUsername") or "").strip()
crow = contact_rows.get(cu)
conv_name = _pick_display_name(crow, cu) if cu else ""
h["conversationName"] = conv_name or cu
conv_avatar = _pick_avatar_url(crow)
if (not conv_avatar) and cu and (cu in local_avatar_usernames):
conv_avatar = base_url + _build_avatar_url(account_dir.name, cu)
h["conversationAvatar"] = conv_avatar
if su:
row = contact_rows.get(su)
h["senderDisplayName"] = (
_pick_display_name(row, su) if row is not None else (conv_name if su == cu else su)
)
avatar_url = _pick_avatar_url(row)
if (not avatar_url) and (su in local_avatar_usernames):
avatar_url = base_url + _build_avatar_url(account_dir.name, su)
h["senderAvatar"] = avatar_url
return {
"status": "success",
"account": account_dir.name,
"scope": scope,
"username": username,
"q": q,
"tokens": tokens,
"offset": int(offset),
"limit": int(limit),
"baseUrl": base_url,
"total": int(total),
"hasMore": bool(int(offset) + int(limit) < int(total)),
"index": index,
"hits": hits,
}
@router.get("/api/chat/search", summary="搜索聊天记录(消息)")
async def search_chat_messages(
request: Request,
q: str,
account: Optional[str] = None,
username: Optional[str] = None,
sender: Optional[str] = None,
session_type: Optional[str] = None,
limit: int = 50,
offset: int = 0,
start_time: Optional[int] = None,
end_time: Optional[int] = None,
render_types: Optional[str] = None,
include_hidden: bool = False,
include_official: bool = False,
session_limit: int = 200,
per_chat_scan: int = 200,
scan_limit: int = 20000,
):
return await _search_chat_messages_via_fts(
request,
q=q,
account=account,
username=username,
sender=sender,
session_type=session_type,
limit=limit,
offset=offset,
start_time=start_time,
end_time=end_time,
render_types=render_types,
include_hidden=include_hidden,
include_official=include_official,
)
tokens = _make_search_tokens(q)
if not tokens:
raise HTTPException(status_code=400, detail="Missing q.")
if limit <= 0:
raise HTTPException(status_code=400, detail="Invalid limit.")
if limit > 200:
limit = 200
if offset < 0:
offset = 0
if session_limit <= 0:
session_limit = 200
if session_limit > 2000:
session_limit = 2000
if per_chat_scan <= 0:
per_chat_scan = 200
if per_chat_scan > 5000:
per_chat_scan = 5000
if scan_limit <= 0:
scan_limit = 20000
if scan_limit > 200000:
scan_limit = 200000
start_ts = int(start_time) if start_time is not None else None
end_ts = int(end_time) if end_time is not None else None
if start_ts is not None and start_ts < 0:
start_ts = 0
if end_ts is not None and end_ts < 0:
end_ts = 0
want_types: Optional[set[str]] = None
if render_types is not None:
parts = [p.strip() for p in str(render_types or "").split(",") if p.strip()]
want_types = {p for p in parts if p}
if not want_types:
want_types = None
account_dir = _resolve_account_dir(account)
db_paths = _iter_message_db_paths(account_dir)
contact_db_path = account_dir / "contact.db"
session_db_path = account_dir / "session.db"
if not db_paths:
return {
"status": "error",
"account": account_dir.name,
"q": q,
"hits": [],
"hasMore": False,
"scanLimited": False,
"scannedMessages": 0,
"message": "No message databases found for this account.",
}
def build_haystack(hit: dict[str, Any]) -> str:
items = [
str(hit.get("content") or ""),
str(hit.get("title") or ""),
str(hit.get("url") or ""),
str(hit.get("quoteTitle") or ""),
str(hit.get("quoteContent") or ""),
str(hit.get("amount") or ""),
]
return "\n".join([x for x in items if x.strip()])
def scan_conversation(conv_username: str, *, per_db_limit: int, max_hits: Optional[int] = None) -> tuple[list[dict[str, Any]], int, bool]:
is_group = bool(conv_username.endswith("@chatroom"))
scanned = 0
truncated = False
hits: list[dict[str, Any]] = []
seen_ids: set[str] = set()
for db_path in db_paths:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
try:
table_name = _resolve_msg_table_name(conn, conv_username)
if not table_name:
continue
my_wxid = account_dir.name
my_rowid = None
try:
r2 = conn.execute(
"SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1",
(my_wxid,),
).fetchone()
if r2 is not None:
my_rowid = int(r2[0])
except Exception:
my_rowid = None
where_parts: list[str] = []
params: list[Any] = []
if start_ts is not None:
where_parts.append("m.create_time >= ?")
params.append(int(start_ts))
if end_ts is not None:
where_parts.append("m.create_time <= ?")
params.append(int(end_ts))
where_sql = ""
if where_parts:
where_sql = "WHERE " + " AND ".join(where_parts)
quoted_table = _quote_ident(table_name)
sql_with_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, n.user_name AS sender_username "
f"FROM {quoted_table} m "
"LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid "
f"{where_sql} "
"ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC "
"LIMIT ?"
)
sql_no_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, '' AS sender_username "
f"FROM {quoted_table} m "
f"{where_sql} "
"ORDER BY m.create_time DESC, m.sort_seq DESC, m.local_id DESC "
"LIMIT ?"
)
conn.text_factory = bytes
per_db_probe = int(per_db_limit) + 1
params_probe = list(params) + [per_db_probe]
try:
rows = conn.execute(sql_with_join, params_probe).fetchall()
except Exception:
rows = conn.execute(sql_no_join, params_probe).fetchall()
if len(rows) > per_db_limit:
truncated = True
rows = rows[:per_db_limit]
scanned += len(rows)
for rr in rows:
hit = _row_to_search_hit(
rr,
db_path=db_path,
table_name=table_name,
username=conv_username,
account_dir=account_dir,
is_group=is_group,
my_rowid=my_rowid,
)
if want_types is not None and str(hit.get("renderType") or "") not in want_types:
continue
haystack = build_haystack(hit)
if not _match_tokens(haystack, tokens):
continue
mid = str(hit.get("id") or "")
if not mid or mid in seen_ids:
continue
seen_ids.add(mid)
snippet_src = str(hit.get("content") or "").strip() or str(hit.get("title") or "").strip() or haystack
hit["snippet"] = _make_snippet(snippet_src, tokens)
hits.append(hit)
if max_hits is not None and len(hits) >= int(max_hits):
return hits, scanned, True
finally:
conn.close()
return hits, scanned, truncated
base_url = str(request.base_url).rstrip("/")
hits: list[dict[str, Any]] = []
scanned_messages = 0
scan_limited = False
if username:
conv_hits, scanned, truncated = scan_conversation(username, per_db_limit=scan_limit)
scanned_messages = scanned
scan_limited = bool(truncated)
conv_hits.sort(
key=lambda h: (
int(h.get("createTime") or 0),
int(h.get("sortSeq") or 0),
int(h.get("localId") or 0),
),
reverse=True,
)
total_in_scan = len(conv_hits)
page = conv_hits[int(offset) : int(offset) + int(limit)]
uniq_usernames = list(dict.fromkeys([username] + [str(x.get("senderUsername") or "") for x in page]))
contact_rows = _load_contact_rows(contact_db_path, uniq_usernames)
conv_row = contact_rows.get(username)
conv_name = _pick_display_name(conv_row, username)
for h in page:
su = str(h.get("senderUsername") or "").strip()
h["conversationName"] = conv_name
if su:
row = contact_rows.get(su)
h["senderDisplayName"] = _pick_display_name(row, su) if row is not None else (conv_name if su == username else su)
return {
"status": "success",
"account": account_dir.name,
"scope": "conversation",
"username": username,
"q": q,
"tokens": tokens,
"offset": int(offset),
"limit": int(limit),
"totalInScan": total_in_scan,
"hasMore": bool((int(offset) + int(limit) < total_in_scan) or scan_limited),
"scanLimited": bool(scan_limited),
"scannedMessages": int(scanned_messages),
"hits": page,
}
# Global: scan recent conversations (session.db), then keep only top K newest hits within the scanned window.
if not session_db_path.exists():
raise HTTPException(status_code=404, detail="session.db not found for this account.")
sconn = sqlite3.connect(str(session_db_path))
sconn.row_factory = sqlite3.Row
try:
rows = sconn.execute(
"""
SELECT
username,
is_hidden,
sort_timestamp,
last_timestamp
FROM SessionTable
ORDER BY sort_timestamp DESC
LIMIT ?
""",
(int(session_limit),),
).fetchall()
finally:
sconn.close()
conv_usernames: list[str] = []
for r in rows:
u = str(r["username"] or "").strip()
if not u:
continue
if not include_hidden and int(r["is_hidden"] or 0) == 1:
continue
if not _should_keep_session(u, include_official=include_official):
continue
conv_usernames.append(u)
top_k = min(5000, int(offset) + int(limit) + 2000)
heap: list[tuple[tuple[int, int, int], dict[str, Any]]] = []
for conv in conv_usernames:
conv_hits, scanned, truncated = scan_conversation(conv, per_db_limit=per_chat_scan, max_hits=50)
scanned_messages += int(scanned)
if truncated:
scan_limited = True
for h in conv_hits:
k = (
int(h.get("createTime") or 0),
int(h.get("sortSeq") or 0),
int(h.get("localId") or 0),
)
heapq.heappush(heap, (k, h))
if len(heap) > top_k:
heapq.heappop(heap)
heap.sort(key=lambda x: x[0], reverse=True)
hits_all = [x[1] for x in heap]
total_in_scan = len(hits_all)
page = hits_all[int(offset) : int(offset) + int(limit)]
uniq_contacts = list(
dict.fromkeys(
[str(x.get("username") or "") for x in page] + [str(x.get("senderUsername") or "") for x in page]
)
)
contact_rows = _load_contact_rows(contact_db_path, uniq_contacts)
for h in page:
cu = str(h.get("username") or "").strip()
su = str(h.get("senderUsername") or "").strip()
crow = contact_rows.get(cu)
conv_name = _pick_display_name(crow, cu) if cu else ""
h["conversationName"] = conv_name or cu
if su:
row = contact_rows.get(su)
h["senderDisplayName"] = _pick_display_name(row, su) if row is not None else (conv_name if su == cu else su)
return {
"status": "success",
"account": account_dir.name,
"scope": "global",
"q": q,
"tokens": tokens,
"offset": int(offset),
"limit": int(limit),
"baseUrl": base_url,
"totalInScan": total_in_scan,
"hasMore": bool((int(offset) + int(limit) < total_in_scan) or scan_limited or (total_in_scan >= top_k)),
"scanLimited": bool(scan_limited),
"scannedMessages": int(scanned_messages),
"conversationsScanned": len(conv_usernames),
"hits": page,
}
@router.get("/api/chat/messages/around", summary="定位到某条消息并返回上下文")
async def get_chat_messages_around(
request: Request,
username: str,
anchor_id: str,
account: Optional[str] = None,
before: int = 20,
after: int = 20,
):
if not username:
raise HTTPException(status_code=400, detail="Missing username.")
if not anchor_id:
raise HTTPException(status_code=400, detail="Missing anchor_id.")
if before < 0:
before = 0
if after < 0:
after = 0
if before > 200:
before = 200
if after > 200:
after = 200
parts = str(anchor_id).split(":", 2)
if len(parts) != 3:
raise HTTPException(status_code=400, detail="Invalid anchor_id.")
anchor_db_stem, anchor_table_name, anchor_local_id_str = parts
try:
anchor_local_id = int(anchor_local_id_str)
except Exception:
raise HTTPException(status_code=400, detail="Invalid anchor_id.")
account_dir = _resolve_account_dir(account)
db_paths = _iter_message_db_paths(account_dir)
contact_db_path = account_dir / "contact.db"
head_image_db_path = account_dir / "head_image.db"
message_resource_db_path = account_dir / "message_resource.db"
base_url = str(request.base_url).rstrip("/")
target_db: Optional[Path] = None
for p in db_paths:
if p.stem == anchor_db_stem:
target_db = p
break
if target_db is None:
raise HTTPException(status_code=404, detail="Anchor database not found.")
resource_conn: Optional[sqlite3.Connection] = None
resource_chat_id: Optional[int] = None
try:
if message_resource_db_path.exists():
resource_conn = sqlite3.connect(str(message_resource_db_path))
resource_conn.row_factory = sqlite3.Row
resource_chat_id = _resource_lookup_chat_id(resource_conn, username)
except Exception:
if resource_conn is not None:
try:
resource_conn.close()
except Exception:
pass
resource_conn = None
resource_chat_id = None
conn = sqlite3.connect(str(target_db))
conn.row_factory = sqlite3.Row
try:
table_name = str(anchor_table_name).strip()
if not table_name:
raise HTTPException(status_code=404, detail="Anchor table not found.")
# Normalize table name casing if needed
try:
trows = conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
lower_to_actual = {str(x[0]).lower(): str(x[0]) for x in trows if x and x[0]}
table_name = lower_to_actual.get(table_name.lower(), table_name)
except Exception:
pass
my_wxid = account_dir.name
my_rowid = None
try:
r2 = conn.execute(
"SELECT rowid FROM Name2Id WHERE user_name = ? LIMIT 1",
(my_wxid,),
).fetchone()
if r2 is not None:
my_rowid = int(r2[0])
except Exception:
my_rowid = None
quoted_table = _quote_ident(table_name)
sql_anchor_with_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, n.user_name AS sender_username "
f"FROM {quoted_table} m "
"LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid "
"WHERE m.local_id = ? "
"LIMIT 1"
)
sql_anchor_no_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, '' AS sender_username "
f"FROM {quoted_table} m "
"WHERE m.local_id = ? "
"LIMIT 1"
)
conn.text_factory = bytes
try:
anchor_row = conn.execute(sql_anchor_with_join, (anchor_local_id,)).fetchone()
except Exception:
anchor_row = conn.execute(sql_anchor_no_join, (anchor_local_id,)).fetchone()
if anchor_row is None:
raise HTTPException(status_code=404, detail="Anchor message not found.")
anchor_ct = int(anchor_row["create_time"] or 0)
anchor_ss = int(anchor_row["sort_seq"] or 0) if anchor_row["sort_seq"] is not None else 0
where_before = (
"WHERE ("
"m.create_time < ? "
"OR (m.create_time = ? AND COALESCE(m.sort_seq, 0) < ?) "
"OR (m.create_time = ? AND COALESCE(m.sort_seq, 0) = ? AND m.local_id <= ?)"
")"
)
where_after = (
"WHERE ("
"m.create_time > ? "
"OR (m.create_time = ? AND COALESCE(m.sort_seq, 0) > ?) "
"OR (m.create_time = ? AND COALESCE(m.sort_seq, 0) = ? AND m.local_id >= ?)"
")"
)
sql_before_with_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, n.user_name AS sender_username "
f"FROM {quoted_table} m "
"LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid "
f"{where_before} "
"ORDER BY m.create_time DESC, COALESCE(m.sort_seq, 0) DESC, m.local_id DESC "
"LIMIT ?"
)
sql_before_no_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, '' AS sender_username "
f"FROM {quoted_table} m "
f"{where_before} "
"ORDER BY m.create_time DESC, COALESCE(m.sort_seq, 0) DESC, m.local_id DESC "
"LIMIT ?"
)
sql_after_with_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, n.user_name AS sender_username "
f"FROM {quoted_table} m "
"LEFT JOIN Name2Id n ON m.real_sender_id = n.rowid "
f"{where_after} "
"ORDER BY m.create_time ASC, COALESCE(m.sort_seq, 0) ASC, m.local_id ASC "
"LIMIT ?"
)
sql_after_no_join = (
"SELECT "
"m.local_id, m.server_id, m.local_type, m.sort_seq, m.real_sender_id, m.create_time, "
"m.message_content, m.compress_content, '' AS sender_username "
f"FROM {quoted_table} m "
f"{where_after} "
"ORDER BY m.create_time ASC, COALESCE(m.sort_seq, 0) ASC, m.local_id ASC "
"LIMIT ?"
)
params_before = (anchor_ct, anchor_ct, anchor_ss, anchor_ct, anchor_ss, anchor_local_id, int(before) + 1)
params_after = (anchor_ct, anchor_ct, anchor_ss, anchor_ct, anchor_ss, anchor_local_id, int(after) + 1)
try:
before_rows = conn.execute(sql_before_with_join, params_before).fetchall()
except Exception:
before_rows = conn.execute(sql_before_no_join, params_before).fetchall()
try:
after_rows = conn.execute(sql_after_with_join, params_after).fetchall()
except Exception:
after_rows = conn.execute(sql_after_no_join, params_after).fetchall()
seen_ids: set[str] = set()
combined: list[sqlite3.Row] = []
for rr in list(before_rows) + list(after_rows):
lid = int(rr["local_id"] or 0)
mid = f"{target_db.stem}:{table_name}:{lid}"
if mid in seen_ids:
continue
seen_ids.add(mid)
combined.append(rr)
merged: list[dict[str, Any]] = []
sender_usernames: list[str] = []
quote_usernames: list[str] = []
pat_usernames: set[str] = set()
is_group = bool(username.endswith("@chatroom"))
_append_full_messages_from_rows(
merged=merged,
sender_usernames=sender_usernames,
quote_usernames=quote_usernames,
pat_usernames=pat_usernames,
rows=combined,
db_path=target_db,
table_name=table_name,
username=username,
account_dir=account_dir,
is_group=is_group,
my_rowid=my_rowid,
resource_conn=resource_conn,
resource_chat_id=resource_chat_id,
)
return_messages = merged
finally:
conn.close()
if resource_conn is not None:
try:
resource_conn.close()
except Exception:
pass
_postprocess_full_messages(
merged=return_messages,
sender_usernames=sender_usernames,
quote_usernames=quote_usernames,
pat_usernames=pat_usernames,
account_dir=account_dir,
username=username,
base_url=base_url,
contact_db_path=contact_db_path,
head_image_db_path=head_image_db_path,
)
def sort_key(m: dict[str, Any]) -> tuple[int, int, int]:
sseq = int(m.get("sortSeq") or 0)
cts = int(m.get("createTime") or 0)
lid = int(m.get("localId") or 0)
return (cts, sseq, lid)
return_messages.sort(key=sort_key, reverse=False)
anchor_index = -1
for i, m in enumerate(return_messages):
if str(m.get("id") or "") == str(anchor_id):
anchor_index = i
break
return {
"status": "success",
"account": account_dir.name,
"username": username,
"anchorId": anchor_id,
"anchorIndex": anchor_index,
"messages": return_messages,
}