feat(chat): 聊天页支持日历定位/卡片解析/HTML导出分页

- 新增 /api/chat/messages/daily_counts 与 /api/chat/messages/anchor,用于月度热力图与按日/首条定位\n- messages/around 支持跨 message 分片定位,定位更稳定\n- 新增 /api/chat/chat_history/resolve 与 /api/chat/appmsg/resolve,合并转发/链接卡片可按 server_id 补全\n- 新增 /api/chat/media/favicon,并补齐 link 本地缩略图处理\n- HTML 导出支持分页加载(html_page_size),避免大聊天单文件卡顿\n- tests: 覆盖 heatmap/anchor、favicon 缓存、HTML 分页导出
This commit is contained in:
2977094657
2026-02-15 14:32:47 +08:00
parent 31d98abddf
commit bd44601611
10 changed files with 4375 additions and 302 deletions

View File

@@ -0,0 +1,221 @@
import os
import json
import hashlib
import sqlite3
import sys
import unittest
import zipfile
import importlib
from pathlib import Path
from tempfile import TemporaryDirectory
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT / "src"))
class TestChatExportHtmlPaging(unittest.TestCase):
def _reload_export_modules(self):
import wechat_decrypt_tool.app_paths as app_paths
import wechat_decrypt_tool.chat_helpers as chat_helpers
import wechat_decrypt_tool.media_helpers as media_helpers
import wechat_decrypt_tool.chat_export_service as chat_export_service
importlib.reload(app_paths)
importlib.reload(chat_helpers)
importlib.reload(media_helpers)
importlib.reload(chat_export_service)
return chat_export_service
def _seed_contact_db(self, path: Path, *, account: str, username: str) -> None:
conn = sqlite3.connect(str(path))
try:
conn.execute(
"""
CREATE TABLE contact (
username TEXT,
remark TEXT,
nick_name TEXT,
alias TEXT,
local_type INTEGER,
verify_flag INTEGER,
big_head_url TEXT,
small_head_url TEXT
)
"""
)
conn.execute(
"""
CREATE TABLE stranger (
username TEXT,
remark TEXT,
nick_name TEXT,
alias TEXT,
local_type INTEGER,
verify_flag INTEGER,
big_head_url TEXT,
small_head_url TEXT
)
"""
)
conn.execute(
"INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(account, "", "Me", "", 1, 0, "", ""),
)
conn.execute(
"INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(username, "", "Friend", "", 1, 0, "", ""),
)
conn.commit()
finally:
conn.close()
def _seed_session_db(self, path: Path, *, username: str) -> None:
conn = sqlite3.connect(str(path))
try:
conn.execute(
"""
CREATE TABLE SessionTable (
username TEXT,
is_hidden INTEGER,
sort_timestamp INTEGER
)
"""
)
conn.execute(
"INSERT INTO SessionTable VALUES (?, ?, ?)",
(username, 0, 1735689600),
)
conn.commit()
finally:
conn.close()
def _seed_message_db(self, path: Path, *, account: str, username: str, total: int) -> None:
conn = sqlite3.connect(str(path))
try:
conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)")
conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account))
conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username))
table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}"
conn.execute(
f"""
CREATE TABLE {table_name} (
local_id INTEGER,
server_id INTEGER,
local_type INTEGER,
sort_seq INTEGER,
real_sender_id INTEGER,
create_time INTEGER,
message_content TEXT,
compress_content BLOB
)
"""
)
# Generate lots of plain text messages with unique markers.
rows = []
base_ts = 1735689600
for i in range(1, total + 1):
marker = f"MSG{i:04d}"
real_sender_id = 1 if (i % 2 == 0) else 2
rows.append((i, 100000 + i, 1, i, real_sender_id, base_ts + i, marker, None))
conn.executemany(
f"INSERT INTO {table_name} (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
rows,
)
conn.commit()
finally:
conn.close()
def _prepare_account(self, root: Path, *, account: str, username: str, total: int) -> Path:
account_dir = root / "output" / "databases" / account
account_dir.mkdir(parents=True, exist_ok=True)
self._seed_contact_db(account_dir / "contact.db", account=account, username=username)
self._seed_session_db(account_dir / "session.db", username=username)
self._seed_message_db(account_dir / "message_0.db", account=account, username=username, total=total)
return account_dir
def _create_job(self, manager, *, account: str, username: str, html_page_size: int):
job = manager.create_job(
account=account,
scope="selected",
usernames=[username],
export_format="html",
start_time=None,
end_time=None,
include_hidden=False,
include_official=False,
include_media=False,
media_kinds=[],
message_types=[],
output_dir=None,
allow_process_key_extract=False,
download_remote_media=False,
html_page_size=html_page_size,
privacy_mode=False,
file_name=None,
)
# Export is async (thread). Allow enough time for a few thousand messages + zip writes.
for _ in range(600):
latest = manager.get_job(job.export_id)
if latest and latest.status in {"done", "error", "cancelled"}:
return latest
import time as _time
_time.sleep(0.05)
self.fail("export job did not finish in time")
def test_html_export_paging_inlines_latest_page_only(self):
with TemporaryDirectory() as td:
root = Path(td)
account = "wxid_test"
username = "wxid_friend"
total_messages = 2300
page_size = 1000
self._prepare_account(root, account=account, username=username, total=total_messages)
prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR")
try:
os.environ["WECHAT_TOOL_DATA_DIR"] = str(root)
svc = self._reload_export_modules()
job = self._create_job(
svc.CHAT_EXPORT_MANAGER,
account=account,
username=username,
html_page_size=page_size,
)
self.assertEqual(job.status, "done", msg=job.error)
self.assertTrue(job.zip_path and job.zip_path.exists())
with zipfile.ZipFile(job.zip_path, "r") as zf:
names = set(zf.namelist())
html_path = next((n for n in names if n.endswith("/messages.html")), "")
self.assertTrue(html_path, msg="missing messages.html")
html_text = zf.read(html_path).decode("utf-8", errors="ignore")
# Paging UI + meta should exist for multi-page exports.
self.assertIn('id="wcePageMeta"', html_text)
self.assertIn('id="wcePager"', html_text)
self.assertIn('id="wceMessageList"', html_text)
self.assertIn('id="wceLoadPrevBtn"', html_text)
# Latest page is inlined; earliest page should not be present in messages.html.
self.assertIn("MSG2300", html_text)
self.assertNotIn("MSG0001", html_text)
conv_dir = html_path.rsplit("/", 1)[0]
page1_js = f"{conv_dir}/pages/page-0001.js"
self.assertIn(page1_js, names)
page1_text = zf.read(page1_js).decode("utf-8", errors="ignore")
self.assertIn("MSG0001", page1_text)
finally:
if prev_data is None:
os.environ.pop("WECHAT_TOOL_DATA_DIR", None)
else:
os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data

View File

@@ -0,0 +1,133 @@
import os
import sqlite3
import sys
import unittest
import importlib
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT / "src"))
class _FakeResponse:
def __init__(self, *, status_code: int = 200, headers: dict | None = None, url: str = "", body: bytes = b""):
self.status_code = int(status_code)
self.headers = dict(headers or {})
self.url = str(url or "")
self._body = bytes(body or b"")
def iter_content(self, chunk_size: int = 64 * 1024):
yield self._body
def close(self) -> None:
return None
class TestChatMediaFavicon(unittest.TestCase):
def test_chat_media_favicon_caches(self):
from fastapi import FastAPI
from fastapi.testclient import TestClient
# 1x1 PNG (same as other avatar cache tests)
png = bytes.fromhex(
"89504E470D0A1A0A"
"0000000D49484452000000010000000108060000001F15C489"
"0000000D49444154789C6360606060000000050001A5F64540"
"0000000049454E44AE426082"
)
with TemporaryDirectory() as td:
root = Path(td)
prev_data = None
prev_cache = None
try:
prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR")
prev_cache = os.environ.get("WECHAT_TOOL_AVATAR_CACHE_ENABLED")
os.environ["WECHAT_TOOL_DATA_DIR"] = str(root)
os.environ["WECHAT_TOOL_AVATAR_CACHE_ENABLED"] = "1"
import wechat_decrypt_tool.app_paths as app_paths
import wechat_decrypt_tool.avatar_cache as avatar_cache
import wechat_decrypt_tool.routers.chat_media as chat_media
importlib.reload(app_paths)
importlib.reload(avatar_cache)
importlib.reload(chat_media)
def fake_head(url, **_kwargs):
# Pretend short-link resolves to bilibili.
return _FakeResponse(
status_code=200,
headers={},
url="https://www.bilibili.com/video/BV1Au4tzNEq2",
body=b"",
)
def fake_get(url, **_kwargs):
u = str(url or "")
if "www.bilibili.com/favicon.ico" in u:
return _FakeResponse(
status_code=200,
headers={"Content-Type": "image/png", "content-length": str(len(png))},
url=u,
body=png,
)
return _FakeResponse(
status_code=404,
headers={"Content-Type": "text/html"},
url=u,
body=b"",
)
app = FastAPI()
app.include_router(chat_media.router)
client = TestClient(app)
with patch("wechat_decrypt_tool.routers.chat_media.requests.head", side_effect=fake_head) as mock_head, patch(
"wechat_decrypt_tool.routers.chat_media.requests.get", side_effect=fake_get
) as mock_get:
resp = client.get("/api/chat/media/favicon", params={"url": "https://b23.tv/au68guF"})
self.assertEqual(resp.status_code, 200)
self.assertTrue(resp.headers.get("content-type", "").startswith("image/"))
self.assertEqual(resp.content, png)
# Second call should hit disk cache (no extra favicon download).
resp2 = client.get("/api/chat/media/favicon", params={"url": "https://b23.tv/au68guF"})
self.assertEqual(resp2.status_code, 200)
self.assertEqual(resp2.content, png)
self.assertGreaterEqual(mock_head.call_count, 1)
self.assertEqual(mock_get.call_count, 1)
cache_db = root / "output" / "avatar_cache" / "favicon" / "avatar_cache.db"
self.assertTrue(cache_db.exists())
conn = sqlite3.connect(str(cache_db))
try:
row = conn.execute(
"SELECT source_kind, source_url, media_type FROM avatar_cache_entries WHERE source_kind = 'url' LIMIT 1"
).fetchone()
self.assertIsNotNone(row)
self.assertEqual(str(row[0] or ""), "url")
self.assertIn("favicon.ico", str(row[1] or ""))
self.assertTrue(str(row[2] or "").startswith("image/"))
finally:
conn.close()
finally:
if prev_data is None:
os.environ.pop("WECHAT_TOOL_DATA_DIR", None)
else:
os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data
if prev_cache is None:
os.environ.pop("WECHAT_TOOL_AVATAR_CACHE_ENABLED", None)
else:
os.environ["WECHAT_TOOL_AVATAR_CACHE_ENABLED"] = prev_cache
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,292 @@
import hashlib
import sqlite3
import sys
import unittest
from datetime import datetime
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT / "src"))
from wechat_decrypt_tool.routers import chat as chat_router
def _msg_table_name(username: str) -> str:
md5_hex = hashlib.md5(username.encode("utf-8")).hexdigest()
return f"Msg_{md5_hex}"
def _seed_message_db(path: Path, *, username: str, rows: list[tuple[int, int]]) -> None:
"""rows: [(create_time, sort_seq), ...]"""
table = _msg_table_name(username)
conn = sqlite3.connect(str(path))
try:
conn.execute(
f"""
CREATE TABLE "{table}"(
local_id INTEGER PRIMARY KEY AUTOINCREMENT,
create_time INTEGER,
sort_seq INTEGER
)
"""
)
for create_time, sort_seq in rows:
conn.execute(
f'INSERT INTO "{table}"(create_time, sort_seq) VALUES (?, ?)',
(int(create_time), int(sort_seq)),
)
conn.commit()
finally:
conn.close()
def _seed_message_db_full(path: Path, *, username: str, rows: list[tuple[int, int, str]]) -> None:
"""rows: [(create_time, sort_seq, text), ...] - minimal schema for /api/chat/messages/around."""
table = _msg_table_name(username)
conn = sqlite3.connect(str(path))
try:
conn.execute(
f"""
CREATE TABLE "{table}"(
local_id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER,
local_type INTEGER,
sort_seq INTEGER,
real_sender_id INTEGER,
create_time INTEGER,
message_content TEXT,
compress_content BLOB
)
"""
)
for create_time, sort_seq, text in rows:
conn.execute(
f'INSERT INTO "{table}"(server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) '
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(0, 1, int(sort_seq), 0, int(create_time), str(text), None),
)
conn.commit()
finally:
conn.close()
def _seed_contact_db_minimal(path: Path) -> None:
conn = sqlite3.connect(str(path))
try:
conn.execute(
"""
CREATE TABLE contact (
username TEXT,
remark TEXT,
nick_name TEXT,
alias TEXT,
big_head_url TEXT,
small_head_url TEXT
)
"""
)
conn.execute(
"""
CREATE TABLE stranger (
username TEXT,
remark TEXT,
nick_name TEXT,
alias TEXT,
big_head_url TEXT,
small_head_url TEXT
)
"""
)
conn.commit()
finally:
conn.close()
class TestChatMessageCalendarHeatmap(unittest.TestCase):
def test_daily_counts_aggregates_per_day_and_respects_month_range(self):
with TemporaryDirectory() as td:
account_dir = Path(td) / "acc"
account_dir.mkdir(parents=True, exist_ok=True)
username = "wxid_test_user"
ts_jan31_23 = int(datetime(2026, 1, 31, 23, 0, 0).timestamp())
ts_feb01_10 = int(datetime(2026, 2, 1, 10, 0, 0).timestamp())
ts_feb14_12 = int(datetime(2026, 2, 14, 12, 0, 0).timestamp())
_seed_message_db(
account_dir / "message.db",
username=username,
rows=[
(ts_jan31_23, 0),
(ts_feb01_10, 5),
(ts_feb01_10, 2),
(ts_feb14_12, 0),
],
)
with patch.object(chat_router, "_resolve_account_dir", return_value=account_dir):
resp = chat_router.get_chat_message_daily_counts(
username=username,
year=2026,
month=2,
account="acc",
)
self.assertEqual(resp.get("status"), "success")
self.assertEqual(resp.get("username"), username)
self.assertEqual(resp.get("year"), 2026)
self.assertEqual(resp.get("month"), 2)
counts = resp.get("counts") or {}
self.assertEqual(counts.get("2026-02-01"), 2)
self.assertEqual(counts.get("2026-02-14"), 1)
self.assertIsNone(counts.get("2026-01-31"))
self.assertEqual(resp.get("total"), 3)
self.assertEqual(resp.get("max"), 2)
def test_anchor_day_picks_earliest_by_create_time_then_sort_seq_then_local_id(self):
with TemporaryDirectory() as td:
account_dir = Path(td) / "acc"
account_dir.mkdir(parents=True, exist_ok=True)
username = "wxid_test_user"
ts_jan31_23 = int(datetime(2026, 1, 31, 23, 0, 0).timestamp())
ts_feb01_10 = int(datetime(2026, 2, 1, 10, 0, 0).timestamp())
_seed_message_db(
account_dir / "message.db",
username=username,
rows=[
(ts_jan31_23, 0), # local_id = 1
(ts_feb01_10, 5), # local_id = 2
(ts_feb01_10, 2), # local_id = 3 <- expected (sort_seq smaller)
],
)
with patch.object(chat_router, "_resolve_account_dir", return_value=account_dir):
resp = chat_router.get_chat_message_anchor(
username=username,
kind="day",
account="acc",
date="2026-02-01",
)
self.assertEqual(resp.get("status"), "success")
self.assertEqual(resp.get("kind"), "day")
self.assertEqual(resp.get("date"), "2026-02-01")
anchor_id = str(resp.get("anchorId") or "")
self.assertTrue(anchor_id.startswith("message:"), anchor_id)
self.assertTrue(anchor_id.endswith(":3"), anchor_id)
def test_anchor_first_picks_global_earliest(self):
with TemporaryDirectory() as td:
account_dir = Path(td) / "acc"
account_dir.mkdir(parents=True, exist_ok=True)
username = "wxid_test_user"
ts_jan31_23 = int(datetime(2026, 1, 31, 23, 0, 0).timestamp())
ts_feb01_10 = int(datetime(2026, 2, 1, 10, 0, 0).timestamp())
_seed_message_db(
account_dir / "message.db",
username=username,
rows=[
(ts_feb01_10, 2), # local_id = 1
(ts_jan31_23, 0), # local_id = 2, but earlier create_time -> should win even if local_id bigger
],
)
with patch.object(chat_router, "_resolve_account_dir", return_value=account_dir):
resp = chat_router.get_chat_message_anchor(
username=username,
kind="first",
account="acc",
)
self.assertEqual(resp.get("status"), "success")
self.assertEqual(resp.get("kind"), "first")
anchor_id = str(resp.get("anchorId") or "")
self.assertTrue(anchor_id.startswith("message:"), anchor_id)
self.assertTrue(anchor_id.endswith(":2"), anchor_id)
def test_anchor_day_empty_returns_empty_status(self):
with TemporaryDirectory() as td:
account_dir = Path(td) / "acc"
account_dir.mkdir(parents=True, exist_ok=True)
username = "wxid_test_user"
ts_feb01_10 = int(datetime(2026, 2, 1, 10, 0, 0).timestamp())
_seed_message_db(account_dir / "message.db", username=username, rows=[(ts_feb01_10, 0)])
with patch.object(chat_router, "_resolve_account_dir", return_value=account_dir):
resp = chat_router.get_chat_message_anchor(
username=username,
kind="day",
account="acc",
date="2026-02-02",
)
self.assertEqual(resp.get("status"), "empty")
self.assertEqual(resp.get("anchorId"), "")
def test_around_can_span_multiple_message_dbs_for_pagination(self):
from fastapi import FastAPI
from fastapi.testclient import TestClient
with TemporaryDirectory() as td:
account_dir = Path(td) / "acc"
account_dir.mkdir(parents=True, exist_ok=True)
username = "wxid_test_user"
table = _msg_table_name(username)
# Anchor in message.db, next message in message_1.db
_seed_message_db_full(
account_dir / "message.db",
username=username,
rows=[(1000, 0, "A")], # local_id=1
)
_seed_message_db_full(
account_dir / "message_1.db",
username=username,
rows=[(2000, 0, "B")], # local_id=1
)
_seed_contact_db_minimal(account_dir / "contact.db")
app = FastAPI()
app.include_router(chat_router.router)
client = TestClient(app)
with patch.object(chat_router, "_resolve_account_dir", return_value=account_dir):
resp = client.get(
"/api/chat/messages/around",
params={
"account": "acc",
"username": username,
"anchor_id": f"message:{table}:1",
"before": 0,
"after": 10,
},
)
self.assertEqual(resp.status_code, 200, resp.text)
data = resp.json()
self.assertEqual(data.get("status"), "success")
self.assertEqual(data.get("username"), username)
self.assertEqual(data.get("anchorId"), f"message:{table}:1")
self.assertEqual(data.get("anchorIndex"), 0)
msgs = data.get("messages") or []
self.assertEqual(len(msgs), 2)
self.assertEqual(msgs[0].get("id"), f"message:{table}:1")
self.assertEqual(msgs[1].get("id"), f"message_1:{table}:1")