Files
WeChatDataAnalysis/tools/export_database_schema_json.py
2977094657 ebc68de8a8 chore(tools): 添加解密与资源调试脚本
- 增加解密/资源/表情/媒体定位等调试脚本,便于本地排查与验证
2025-12-17 16:59:49 +08:00

138 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
导出微信数据库分析结果为 JSON
- 基于 analyze_wechat_databases.WeChatDatabaseAnalyzer
- 联合 wechat_db_config.json含 ohmywechat 常见类型与启发式)补全字段含义
- 生成汇总 JSON 与按库拆分的 JSON 文件
用法:
python tools/export_database_schema_json.py \
--databases-path output/databases \
--output-dir output/schema_json \
--config wechat_db_config.json
"""
from __future__ import annotations
import argparse
import json
from datetime import datetime
from pathlib import Path
from typing import Any, Dict
import sys
# 项目根目录
ROOT = Path(__file__).resolve().parents[1]
# 确保能导入项目根目录下的 analyze_wechat_databases.py
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
def export_analysis(databases_path: Path, output_dir: Path, config_file: Path) -> int:
# 延迟导入分析器
from analyze_wechat_databases import WeChatDatabaseAnalyzer
output_dir.mkdir(parents=True, exist_ok=True)
analyzer = WeChatDatabaseAnalyzer(databases_path=str(databases_path), config_file=str(config_file))
results = analyzer.analyze_all_databases() # dict[db_name] = db_info
meta = {
"generated_time": datetime.now().isoformat(),
"source": "analyze_wechat_databases.py",
"config_used": str(config_file),
"databases_root": str(databases_path),
"note": "字段含义来自 wechat_db_config.json 与启发式推断(结合 ohmywechat 常见类型)",
}
combined: Dict[str, Any] = {"_metadata": meta, "databases": {}}
count_dbs = 0
for db_name, db_info in results.items():
count_dbs += 1
db_out: Dict[str, Any] = {
"database_name": db_info.get("database_name", db_name),
"database_path": db_info.get("database_path"),
"database_size": db_info.get("database_size"),
"description": db_info.get("description"),
"table_count": db_info.get("table_count"),
"tables": {},
}
tables = db_info.get("tables", {})
for table_name, table in tables.items():
# 列增强:补充 meaning
cols_out = []
for col in table.get("columns", []):
name = col.get("name")
meaning = analyzer.get_field_meaning(name, table_name) if name else ""
cols_out.append({
"name": name,
"type": col.get("type"),
"notnull": col.get("notnull"),
"default": col.get("dflt_value"),
"pk": col.get("pk"),
"meaning": meaning,
})
tbl_out = {
"row_count": table.get("row_count", 0),
"columns": cols_out,
"indexes": table.get("indexes", []),
"foreign_keys": table.get("foreign_keys", []),
"create_sql": table.get("create_sql"),
"sample_data": table.get("sample_data", []),
# 相似组标记(如 Msg_* 合并)
"is_representative": table.get("is_representative", False),
"similar_group": table.get("similar_group", {}),
}
db_out["tables"][table_name] = tbl_out
# 写入单库 JSON
single_path = output_dir / f"{db_name}.schema.json"
with single_path.open("w", encoding="utf-8") as f:
json.dump(db_out, f, ensure_ascii=False, indent=2)
combined["databases"][db_name] = db_out
print(f"[OK] 写出数据库JSON: {single_path.name}")
# 汇总文件
combined_path = output_dir / "all_databases.schema.json"
with combined_path.open("w", encoding="utf-8") as f:
json.dump(combined, f, ensure_ascii=False, indent=2)
print(f"[OK] 汇总JSON: {combined_path} (数据库数: {count_dbs}")
return count_dbs
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--databases-path", default=str(ROOT / "output" / "databases"),
help="解密后的数据库根目录(按账号分目录)")
parser.add_argument("--output-dir", default=str(ROOT / "output" / "schema_json"),
help="JSON 输出目录")
parser.add_argument("--config", default=str(ROOT / "wechat_db_config.json"),
help="字段含义配置 JSON由 tools/generate_wechat_db_config.py 生成)")
args = parser.parse_args()
db_root = Path(args.databases_path)
out_dir = Path(args.output_dir)
cfg = Path(args.config)
if not cfg.exists():
raise FileNotFoundError(f"未找到配置文件: {cfg},请先运行 tools/generate_wechat_db_config.py")
if not db_root.exists():
print(f"[WARN] 数据库目录不存在: {db_root},仍将生成空汇总文件。")
count = export_analysis(db_root, out_dir, cfg)
if count == 0:
print("[INFO] 未检测到可分析数据库(可先运行解密流程或确认路径)")
if __name__ == "__main__":
main()