mirror of
https://github.com/LifeArchiveProject/WeChatDataAnalysis.git
synced 2026-02-20 06:40:49 +08:00
- 导出格式新增 html:生成 index.html + 会话 messages.html,离线浏览 - 支持 chatHistory(合并消息)解析/渲染与弹窗查看 - 图片资源解析增强:MessageResourceInfo 优先 + md5/hdmd5 兜底 - HTML 导出可选下载远程缩略图(仅公网主机/图片类型/5MB 限制) - 修复拍一拍误判、公众号封面样式识别;转账过期状态与前端展示
200 lines
7.5 KiB
Python
200 lines
7.5 KiB
Python
import os
|
|
import hashlib
|
|
import sqlite3
|
|
import sys
|
|
import unittest
|
|
import zipfile
|
|
import importlib
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
sys.path.insert(0, str(ROOT / "src"))
|
|
|
|
|
|
class TestChatExportImageMd5CandidateFallback(unittest.TestCase):
|
|
def _reload_export_modules(self):
|
|
import wechat_decrypt_tool.app_paths as app_paths
|
|
import wechat_decrypt_tool.chat_helpers as chat_helpers
|
|
import wechat_decrypt_tool.media_helpers as media_helpers
|
|
import wechat_decrypt_tool.chat_export_service as chat_export_service
|
|
|
|
importlib.reload(app_paths)
|
|
importlib.reload(chat_helpers)
|
|
importlib.reload(media_helpers)
|
|
importlib.reload(chat_export_service)
|
|
return chat_export_service
|
|
|
|
def _seed_contact_db(self, path: Path, *, account: str, username: str) -> None:
|
|
conn = sqlite3.connect(str(path))
|
|
try:
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE contact (
|
|
username TEXT,
|
|
remark TEXT,
|
|
nick_name TEXT,
|
|
alias TEXT,
|
|
local_type INTEGER,
|
|
verify_flag INTEGER,
|
|
big_head_url TEXT,
|
|
small_head_url TEXT
|
|
)
|
|
"""
|
|
)
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE stranger (
|
|
username TEXT,
|
|
remark TEXT,
|
|
nick_name TEXT,
|
|
alias TEXT,
|
|
local_type INTEGER,
|
|
verify_flag INTEGER,
|
|
big_head_url TEXT,
|
|
small_head_url TEXT
|
|
)
|
|
"""
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(account, "", "我", "", 1, 0, "", ""),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO contact VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(username, "", "测试好友", "", 1, 0, "", ""),
|
|
)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
def _seed_session_db(self, path: Path, *, username: str) -> None:
|
|
conn = sqlite3.connect(str(path))
|
|
try:
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE SessionTable (
|
|
username TEXT,
|
|
is_hidden INTEGER,
|
|
sort_timestamp INTEGER
|
|
)
|
|
"""
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO SessionTable VALUES (?, ?, ?)",
|
|
(username, 0, 1735689600),
|
|
)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
def _seed_message_db(self, path: Path, *, account: str, username: str) -> None:
|
|
conn = sqlite3.connect(str(path))
|
|
try:
|
|
conn.execute("CREATE TABLE Name2Id (rowid INTEGER PRIMARY KEY, user_name TEXT)")
|
|
conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (1, account))
|
|
conn.execute("INSERT INTO Name2Id(rowid, user_name) VALUES (?, ?)", (2, username))
|
|
|
|
table_name = f"msg_{hashlib.md5(username.encode('utf-8')).hexdigest()}"
|
|
conn.execute(
|
|
f"""
|
|
CREATE TABLE {table_name} (
|
|
local_id INTEGER,
|
|
server_id INTEGER,
|
|
local_type INTEGER,
|
|
sort_seq INTEGER,
|
|
real_sender_id INTEGER,
|
|
create_time INTEGER,
|
|
message_content TEXT,
|
|
compress_content BLOB
|
|
)
|
|
"""
|
|
)
|
|
|
|
good_md5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
bad_md5 = "ffffffffffffffffffffffffffffffff"
|
|
image_xml = f'<msg><img md5="{bad_md5}" hdmd5="{good_md5}" cdnthumburl="img_file_id_1" /></msg>'
|
|
|
|
conn.execute(
|
|
f"INSERT INTO {table_name} (local_id, server_id, local_type, sort_seq, real_sender_id, create_time, message_content, compress_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(1, 1001, 3, 1, 2, 1735689601, image_xml, None),
|
|
)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
def _seed_decrypted_resource(self, account_dir: Path) -> None:
|
|
resource_root = account_dir / "resource"
|
|
(resource_root / "aa").mkdir(parents=True, exist_ok=True)
|
|
(resource_root / "aa" / "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.jpg").write_bytes(b"\xff\xd8\xff\xd9")
|
|
|
|
def _prepare_account(self, root: Path, *, account: str, username: str) -> Path:
|
|
account_dir = root / "output" / "databases" / account
|
|
account_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
self._seed_contact_db(account_dir / "contact.db", account=account, username=username)
|
|
self._seed_session_db(account_dir / "session.db", username=username)
|
|
self._seed_message_db(account_dir / "message_0.db", account=account, username=username)
|
|
self._seed_decrypted_resource(account_dir)
|
|
return account_dir
|
|
|
|
def _create_job(self, manager, *, account: str, username: str):
|
|
job = manager.create_job(
|
|
account=account,
|
|
scope="selected",
|
|
usernames=[username],
|
|
export_format="html",
|
|
start_time=None,
|
|
end_time=None,
|
|
include_hidden=False,
|
|
include_official=False,
|
|
include_media=True,
|
|
media_kinds=["image"],
|
|
message_types=[],
|
|
output_dir=None,
|
|
allow_process_key_extract=False,
|
|
download_remote_media=False,
|
|
privacy_mode=False,
|
|
file_name=None,
|
|
)
|
|
|
|
for _ in range(200):
|
|
latest = manager.get_job(job.export_id)
|
|
if latest and latest.status in {"done", "error", "cancelled"}:
|
|
return latest
|
|
import time as _time
|
|
|
|
_time.sleep(0.05)
|
|
self.fail("export job did not finish in time")
|
|
|
|
def test_falls_back_to_secondary_md5_candidate(self):
|
|
with TemporaryDirectory() as td:
|
|
root = Path(td)
|
|
account = "wxid_test"
|
|
username = "wxid_friend"
|
|
self._prepare_account(root, account=account, username=username)
|
|
|
|
prev_data = os.environ.get("WECHAT_TOOL_DATA_DIR")
|
|
try:
|
|
os.environ["WECHAT_TOOL_DATA_DIR"] = str(root)
|
|
svc = self._reload_export_modules()
|
|
job = self._create_job(svc.CHAT_EXPORT_MANAGER, account=account, username=username)
|
|
self.assertEqual(job.status, "done", msg=job.error)
|
|
|
|
with zipfile.ZipFile(job.zip_path, "r") as zf:
|
|
names = set(zf.namelist())
|
|
self.assertIn("media/images/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.jpg", names)
|
|
self.assertFalse(any("ffffffffffffffffffffffffffffffff" in n for n in names if n.startswith("media/images/")))
|
|
|
|
html_path = next((n for n in names if n.endswith("/messages.html")), "")
|
|
self.assertTrue(html_path)
|
|
html_text = zf.read(html_path).decode("utf-8", errors="ignore")
|
|
self.assertIn("../../media/images/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.jpg", html_text)
|
|
finally:
|
|
if prev_data is None:
|
|
os.environ.pop("WECHAT_TOOL_DATA_DIR", None)
|
|
else:
|
|
os.environ["WECHAT_TOOL_DATA_DIR"] = prev_data
|
|
|