Files
WeChatDataAnalysis/tools/generate_wechat_db_config.py
2977094657 35a2266b1c improvement(tools): 增强配置模板与字段含义生成
- generate_config_template: 增加 CLI 参数;FTS/PRAGMA 失败时从建表 SQL 兜底解析列\n- generate_wechat_db_config: 扩充库描述/字段含义词典,并支持从 tools/ 目录运行\n- 新增 export_database_schema_markdown:基于 wechat_db_config.json 导出 Markdown 文档
2026-02-15 14:34:15 +08:00

940 lines
42 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
生成 wechat_db_config.json:
- 读取 wechat_db_config_template.json
- 融合本项目 analyze_wechat_databases 的启发式 + ohmywechat 常见字段/消息类型
- 批量为每个表字段补全中文含义,并写出 wechat_db_config.json
"""
from __future__ import annotations
import json
import re
from pathlib import Path
from datetime import datetime
import sys
ROOT = Path(__file__).resolve().parents[1]
TEMPLATE_PATH = ROOT / "wechat_db_config_template.json"
OUTPUT_MAIN = ROOT / "wechat_db_config.json"
OUTPUT_DIR = ROOT / "output" / "configs"
OUTPUT_COPY = OUTPUT_DIR / "wechat_db_config.generated.json"
# 允许从 tools/ 目录运行时仍能 import 根目录模块
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
# 尝试导入分析器以复用其启发式
AnalyzerCls = None
try:
from analyze_wechat_databases import WeChatDatabaseAnalyzer # type: ignore
AnalyzerCls = WeChatDatabaseAnalyzer
except Exception:
AnalyzerCls = None
def build_db_descriptions() -> dict[str, str]:
return {
"message": "聊天记录核心数据库",
# message_{n}.db 会在 fill_config 里按正则单独处理(分片/分表)
"message_fts": "聊天消息全文索引数据库FTS",
"message_resource": "消息资源索引数据库(图片/文件/视频等)",
"contact": "联系人数据库(好友/群/公众号基础信息)",
"session": "会话数据库(会话列表与未读统计)",
"sns": "朋友圈数据库(动态与互动)",
"favorite": "收藏数据库",
"favorite_fts": "收藏全文索引数据库FTS",
"emoticon": "表情包数据库",
"head_image": "头像数据数据库",
"hardlink": "硬链接索引数据库(资源去重/快速定位)",
"media_0": "媒体数据数据库含语音SILK等",
"unspportmsg": "不支持消息数据库(客户端不支持的消息类型)",
"general": "通用/系统数据库(新消息通知/支付等)",
"contact_fts": "联系人全文索引数据库FTS",
"chat_search_index": "本项目生成聊天记录全文检索索引库FTS5用于搜索",
"bizchat": "公众号/企业微信相关数据库(会话/联系人等)",
"digital_twin": "(本项目生成)数字分身数据库(派生数据,非微信原始库)",
}
def build_message_types_from_ohmywechat() -> dict[str, str]:
"""
参考 ohmywechat 等资料补充 PC/公众号常见 local_type → 含义
使用 (Type,SubType) 形式的字符串键;子类型未知时置 0
"""
return {
"1,0": "文本消息",
"3,0": "图片消息",
"34,0": "语音消息",
"42,0": "名片消息",
"43,0": "视频消息",
"47,0": "动画表情",
"48,0": "位置消息",
"244813135921,0": "引用消息",
"17179869233,0": "卡片式链接(带描述)",
"21474836529,0": "卡片式链接/图文消息公众号mmreader XML",
"154618822705,0": "小程序分享",
"12884901937,0": "音乐卡片",
"8594229559345,0": "红包卡片",
"81604378673,0": "聊天记录合并转发消息",
"266287972401,0": "拍一拍消息",
"8589934592049,0": "转账卡片",
"270582939697,0": "视频号直播卡片",
"25769803825,0": "文件消息",
"10000,0": "系统消息(撤回/入群提示等)",
}
KNOWN_FIELD_MEANINGS = {
# 通用主键/标识
"id": "标识符字段(主键/索引)",
"local_id": "本地自增ID主键/定位用)",
"server_id": "服务器消息ID唯一且全局递增",
"svr_id": "服务器消息ID同server_id",
"message_id": "消息ID表内主键或消息级索引",
"resource_id": "资源ID资源明细主键",
"history_id": "历史消息ID系统消息/历史消息关联键)",
# 会话/用户/群聊
"username": "用户名/会话标识wxid_xxx 或 xxx@chatroom",
"user_name": "用户名/会话标识wxid_xxx 或 xxx@chatroom",
"sender_id": "发送者内部ID与Name2Id映射",
"real_sender_id": "真实发送者ID群聊内消息具体成员",
"chat_id": "会话内部ID与ChatName2Id映射",
"chat_name_id": "会话内部ID与ChatName2Id映射",
"session_id": "会话IDFTS/资源维度的会话映射)",
"session_name": "会话名username 文本值)",
"session_name_id": "会话内部IDusername 的数值映射)",
"talker_id": "会话/房间IDName2Id 对照)",
# 消息结构/状态
"local_type": "本地消息类型local_type",
"type": "类型标识(上下文相关:消息/表情/配置)",
"sub_type": "子类型标识(同一主类型细分)",
"status": "状态标志位(发送/接收/已读/撤回等)",
"upload_status": "上传状态(媒体/资源上行状态)",
"download_status": "下载状态(媒体/资源下行状态)",
"server_seq": "服务器序列号(消息顺序校验)",
"origin_source": "消息来源标识(客户端/转发/系统)",
"source": "来源附加信息XML/JSON 等)",
"msg_status": "消息状态(扩展)",
# 消息内容
"message_content": "消息内容部分类型为zstd压缩的XMLmmreader",
"compress_content": "压缩内容多见zstd可能存放富文本XML",
"packed_info_data": "打包扩展信息(二进制,消息元数据)",
"packed_info": "打包扩展信息(二进制/文本混合)",
"data_index": "数据分片/索引(媒体片段定位)",
# 时间
"create_time": "创建时间Unix时间戳",
"last_update_time": "最后更新时间Unix时间戳",
"last_modified_time": "最后修改时间Unix时间戳",
"update_time": "更新时间Unix时间戳",
"invalid_time": "失效时间Unix时间戳",
"access_time": "访问时间Unix时间戳",
"last_timestamp": "最后消息时间(会话)",
"sort_timestamp": "排序时间(会话排序)",
"timestamp": "时间戳Unix时间戳",
# 排序/去重
"sort_seq": "排序序列(单会话内消息排序/去重)",
"server_seq_": "服务器序列号(扩展)",
# 联系人/群聊
"alias": "别名(用户自定义标识)",
"encrypt_username": "加密用户名",
"flag": "标志位(多用途:联系人/公众号/配置)",
"delete_flag": "删除标志(软删除)",
"verify_flag": "认证标志(公众号/企业认证等)",
"remark": "备注名",
"remark_quan_pin": "备注名全拼",
"remark_pin_yin_initial": "备注名拼音首字母",
"nick_name": "昵称",
"pin_yin_initial": "昵称拼音首字母",
"quan_pin": "昵称全拼",
"description": "描述/个性签名/备注",
"extra_buffer": "扩展缓冲区(二进制/序列化)",
"ext_buffer": "扩展缓冲区(二进制/序列化)",
"ext_buffer_": "扩展缓冲区(二进制/序列化)",
"chat_room_type": "群类型标志",
"owner": "群主 username",
# 头像/媒体
"big_head_url": "头像大图URL",
"small_head_url": "头像小图URL",
"head_img_md5": "头像MD5",
"image_buffer": "头像二进制数据",
"voice_data": "语音二进制数据多为SILK",
# FTS / 内部表
"acontent": "FTS检索内容分词后文本",
"block": "FTS内部块数据二进制",
"segid": "FTS分段ID",
"term": "FTS分词条目",
"pgno": "FTS页号",
"c0": "FTS列c0内部结构",
"c1": "FTS列c1内部结构",
"c2": "FTS列c2内部结构",
"c3": "FTS列c3内部结构",
"c4": "FTS列c4内部结构",
"c5": "FTS列c5内部结构",
"c6": "FTS列c6内部结构",
"c7": "FTS列c7内部结构",
"c8": "FTS列c8内部结构",
"c9": "FTS列c9内部结构",
"c10": "FTS列c10内部结构",
"c11": "FTS列c11内部结构",
"c12": "FTS列c12内部结构",
"sz": "FTS文档大小信息",
"_rowid_": "SQLite内部行ID",
# 资源/硬链接
"md5": "资源MD5",
"md5_hash": "MD5哈希整数映射快速索引",
"file_name": "文件名(相对/逻辑名)",
"file_size": "文件大小(字节)",
"dir1": "资源路径一级目录编号(分桶)",
"dir2": "资源路径二级目录编号(分桶)",
"modify_time": "文件修改时间戳",
# 会话统计
"unread_count": "未读计数",
"unread_first_msg_srv_id": "会话未读区间首个消息SvrID",
"is_hidden": "会话隐藏标志",
"summary": "会话摘要(最近消息摘要)",
"draft": "草稿内容",
"status_": "状态/标志(上下文)",
"last_clear_unread_timestamp": "上次清空未读时间",
"last_msg_locald_id": "最后一条消息的本地ID拼写原样保留",
"last_msg_type": "最后一条消息类型",
"last_msg_sub_type": "最后一条消息子类型",
"last_msg_sender": "最后一条消息发送者username",
"last_sender_display_name": "最后一条消息发送者显示名",
"last_msg_ext_type": "最后一条消息扩展类型",
# 常见“Key-Value”配置表多库复用
"key": "Key-Value配置表",
"valueint64": "整数值int64",
"valuedouble": "浮点值double",
"valuestdstr": "字符串值std::string",
"valueblob": "二进制值blob",
"k": "配置键k",
"v": "配置值v",
# 常见保留字段
"reserved0": "保留字段reserved0",
"reserved1": "保留字段reserved1",
"reserved2": "保留字段reserved2",
"reserved3": "保留字段reserved3",
# 版本/位标志
"version": "版本号(记录/结构版本,具体含义依表而定)",
"bit_flag": "位标志/开关bit flags",
# 本项目索引/缓存库常见字段
"render_type": "渲染类型本项目定义text/image/system/...",
"db_stem": "来源数据库分片名(如 message_0",
"table_name": "来源表名(如 Msg_xxx",
"sender_username": "发送者username解码后",
"preview": "会话预览文本(用于会话列表展示)",
"built_at": "构建时间Unix时间戳",
"tablename": "表名tableName",
"value": "value",
"brand_user_name": "品牌/公众号usernamebrand_user_name",
# 常见业务字段(命名自解释)
"ticket": "票据/验证ticketticket",
"delete_table_name": "删除记录关联的消息表名delete_table_name",
"res_path": "资源路径res_path",
"biz_username": "公众号usernamebiz_username",
"search_key": "搜索键/索引字段search_key",
"click_type": "点击/热词类型click_type",
"a_group_remark": "群备注FTS检索字段a_group_remark",
"op_code": "操作码op_code",
"query": "查询关键词query",
"score": "评分/权重score",
"keyword": "关键词keyword",
"pay_load_": "payload/扩展数据pay_load_",
"bill_no": "账单号bill_no",
"session_title": "会话标题session_title",
"unread_stat": "未读统计字段unread_stat",
"ui_type": "UI类型/发布类型ui_type",
"error_type": "错误类型error_type",
"tips_content": "提示内容tips_content",
"record_content": "记录内容record_content",
"business_type": "业务类型business_type",
"access_content_key": "访问内容keyaccess_content_key",
"access_content_type": "访问内容类型access_content_type",
"range_type": "范围类型range_type",
"message_local_type": "消息类型message_local_type",
"message_origin_source": "消息来源标识message_origin_source",
# 朋友圈sns常见拆分字段
"tid_heigh_bit": "tid 高位拆分字段heigh_bit字段名原样保留",
"tid_low_bit": "tid 低位拆分字段low_bit",
"break_flag": "断点/分页标志0/1用于分页/增量拉取水位)",
# WCDB 压缩控制
"WCDB_CT_message_content": "WCDB压缩标记message_content列",
"WCDB_CT_source": "WCDB压缩标记source列",
}
# 表级字段含义覆盖(优先级高于 KNOWN_FIELD_MEANINGS
# key: table_name.lower() ; value: { field_name.lower(): meaning }
KNOWN_FIELD_MEANINGS_BY_TABLE: dict[str, dict[str, str]] = {
# contact.db
"contact": {
"id": "序号(通常与 name2id.rowid 对应)",
"username": "联系人的 wxid / 群聊 username可唯一确定联系人",
"local_type": "联系人类型1=通讯录好友/公众号/已添加群聊2=未添加到通讯录的群聊3=群中的陌生人5=企业微信好友6=群聊中的陌生企业微信好友",
"alias": "微信号(微信里显示的微信号)",
"flag": "联系人标志位需转二进制常见第7位星标第12位置顶第17位屏蔽朋友圈第24位仅聊天",
"head_img_md5": "头像md5可通过 head_image.db 查询对应头像)",
"verify_flag": "认证标志(公众号/企业等非0常表示公众号",
"description": "描述字段(样本为空;用途待确认)",
"extra_buffer": "好友扩展信息protobuf包含性别/地区/签名等,本项目解析 gender/signature/country/province/city/source_scene",
"chat_room_notify": "群消息通知相关设置样本为0/1疑似免打扰/通知开关,待确认)",
"is_in_chat_room": "群聊状态标记样本为1/2具体含义待确认",
"chat_room_type": "群聊类型/标志样本为0/2具体含义待确认",
},
"stranger": {
"id": "序号(通常与 name2id.rowid 对应)",
"username": "联系人的 wxid / 群聊 username",
"local_type": "联系人类型1=通讯录好友/公众号/已添加群聊2=未添加到通讯录的群聊3=群中的陌生人5=企业微信好友6=群聊中的陌生企业微信好友",
"alias": "微信号(微信里显示的微信号)",
"flag": "联系人标志位需转二进制常见第7位星标第12位置顶第17位屏蔽朋友圈第24位仅聊天",
"head_img_md5": "头像md5可通过 head_image.db 查询对应头像)",
"verify_flag": "认证标志(公众号/企业等非0常表示公众号",
"description": "描述字段(样本为空;用途待确认)",
"extra_buffer": "好友扩展信息protobuf包含性别/地区/签名等,本项目解析 gender/signature/country/province/city/source_scene",
"chat_room_notify": "群消息通知相关设置样本为0/1疑似免打扰/通知开关,待确认)",
"is_in_chat_room": "群聊状态标记样本为1/2具体含义待确认",
"chat_room_type": "群聊类型/标志样本为0/2具体含义待确认",
},
"biz_info": {
"id": "序号(与 name2id.rowid 对应,可唯一确定一个公众号)",
"username": "公众号username原始 wxid/gh_xxx",
"type": "公众号类型1=公众号0=订阅号(资料来源:万字长文)",
"accept_type": "接收类型accept_type含义待确认样本常为0",
"child_type": "子类型child_type含义待确认样本常为0",
"version": "版本号含义待确认样本常为0",
"external_info": "公众号详细信息(常见 JSON含底部菜单/交互配置等)",
"brand_info": "公众号品牌/菜单信息(常见 JSONurls 等)",
"brand_list": "品牌列表/关联列表(格式待确认,可能为 JSON",
"brand_flag": "品牌/能力标志位(含义待确认)",
"belong": "归属字段(含义待确认)",
"home_url": "主页链接(含义待确认)",
},
"chat_room": {
"id": "序号(与 name2id.rowid 对应)",
"username": "群聊的usernamexxx@chatroom",
"owner": "群主username",
"ext_buffer": "群成员username与群昵称protobufChatRoomData.members 等)",
},
"chat_room_info_detail": {
"room_id_": "序号(与 name2id.rowid 对应)",
"username_": "群聊的usernamexxx@chatroom",
"announcement_": "群公告(文本)",
"announcement_editor_": "群公告编辑者username",
"announcement_publish_time_": "群公告发布时间(时间戳)",
"chat_room_status_": "群状态/标志位bitmask样本常见 0x80000 等,具体位含义待确认)",
"xml_announcement_": "群公告XML可解析更多信息图片/文件等)",
"ext_buffer_": "扩展信息protobuf-like样本长度较小具体结构待确认",
},
"chatroom_member": {
"room_id": "群聊ID对应 name2id.rowid",
"member_id": "群成员ID对应 name2id.rowid",
},
"contact_label": {
"label_id_": "标签ID",
"label_name_": "标签名称",
"sort_order_": "排序",
},
# message_*.db / biz_message_*.db
"msg_*": {
"local_id": "自增id本地",
"server_id": "服务端id每条消息唯一",
"local_type": "消息类型local_type低32位=type高32位=sub_type可用 (local_type & 0xFFFFFFFF) 与 (local_type >> 32) 拆分)",
"sort_seq": "排序字段单会话内消息排序样本≈create_time*1000",
"real_sender_id": "发送者id可通过 Name2Id.rowid 映射到 username",
"create_time": "秒级时间戳",
"server_seq": "服务端接收顺序idserver_seq",
"message_content": "消息内容local_type=1 时为文本,其它类型多为 Zstandard 压缩后的XML/二进制",
"compress_content": "压缩后的内容(多见 Zstandard",
"packed_info_data": "protobuf扩展信息图片文件名/语音转文字/合并转发文件夹名等)",
},
"name2id": {
"is_session": "是否会话名标记1=会话/聊天对象0=其它映射如群成员ID",
},
# session.db
"sessiontable": {
"type": "会话类型样本为0枚举待确认",
"status": "会话状态样本为0枚举待确认",
"unread_first_pat_msg_local_id": "未读拍一拍消息的本地ID样本为0含义待确认",
"unread_first_pat_msg_sort_seq": "未读拍一拍消息的排序序号样本为0含义待确认",
},
"session_last_message": {
"username": "会话username",
"sort_seq": "最后一条消息sort_seq",
"local_id": "最后一条消息local_id",
"create_time": "最后一条消息create_time秒级时间戳",
"local_type": "最后一条消息local_type",
"sender_username": "最后一条消息发送者username",
"preview": "最后一条消息预览文本(用于会话列表)",
"db_stem": "来源消息库分片名(如 message_0",
"table_name": "来源消息表名(如 Msg_xxx",
"built_at": "构建时间Unix时间戳",
},
# 本项目 chat_search_index.db
"message_fts": {
"text": "可检索文本(索引内容)",
"render_type": "渲染类型text/system/image/voice/video/emoji/...,本项目定义)",
"db_stem": "来源消息库分片名(如 message_0",
"table_name": "来源消息表名(如 Msg_xxx",
"sender_username": "发送者username解码后",
},
# emoticon.db
"knonstoreemoticontable": {
"type": "表情类型样本均为3枚举含义待确认",
"caption": "表情说明/标题caption",
"product_id": "表情包/产品IDproduct_id",
"aes_key": "AES密钥用于CDN下载解密",
"auth_key": "鉴权keyCDN下载",
"extern_md5": "外部资源md5extern_md5",
},
"kstoreemoticonpackagetable": {
"package_id_": "表情包IDpackage_id",
"package_name_": "表情包名称",
"payment_status_": "支付状态payment_status",
"download_status_": "下载状态download_status",
"install_time_": "安装时间(时间戳)",
"remove_time_": "移除时间(时间戳)",
"sort_order_": "排序",
"introduction_": "简介introduction",
"full_description_": "完整描述full_description",
"copyright_": "版权信息",
"author_": "作者信息",
"store_icon_url_": "商店图标URL",
"panel_url_": "面板/详情页URL",
},
"kstoreemoticonfilestable": {
"package_id_": "表情包IDpackage_id",
"md5_": "表情md5",
"type_": "表情类型type",
"sort_order_": "排序",
"emoticon_size_": "表情文件大小(字节)",
"emoticon_offset_": "表情文件偏移(用于包内定位)",
"thumb_size_": "缩略图大小(字节)",
"thumb_offset_": "缩略图偏移(用于包内定位)",
},
# favorite.db
"fav_db_item": {
"version": "版本号(收藏条目结构/内容版本样本为87",
"fromusr": "来源用户username收藏来源",
"realchatname": "来源群聊username若收藏来源于群聊",
"upload_error_code": "上传错误码",
"trans_res_error_code": "资源转换错误码trans_res_error_code",
},
# general.db
"ilink_voip": {
"wx_chatroom_": "群聊usernamexxx@chatroom",
"millsecond_": "毫秒时间戳/时间标记(字段名推断)",
"group_id_": "ILink group_id字段名推断",
"room_id_": "房间ID字段名推断",
"room_key_": "房间key字段名推断",
"route_id_": "路由ID字段名推断",
"voice_status_": "通话状态(字段名推断)",
"talker_create_user_": "发起者username字段名推断",
"not_friend_user_list_": "非好友成员列表(字段名推断)",
"members_": "成员列表(字段名推断)",
"is_ilink_": "是否ilink通话字段名推断",
"ever_quit_chatroom_": "是否曾退出群聊(字段名推断)",
},
"fmessagetable": {
"user_name_": "用户名(好友验证/陌生人会话用户名)",
"type_": "消息类型(好友验证/系统消息样本为37",
"timestamp_": "时间戳",
"encrypt_user_name_": "加密用户名",
"content_": "内容(验证消息/系统提示等)",
"is_sender_": "是否发送方is_sender",
"ticket_": "票据/验证ticket",
"scene_": "来源场景码scene",
"fmessage_detail_buf_": "详细信息protobuf-like包含验证文案/来源等信息)",
},
"handoff_remind_v0": {
"item_id": "条目IDitem_id",
"head_icon": "图标URL/资源标识)",
"title": "标题",
"desc_type": "描述类型desc_type",
"create_time": "创建时间(时间戳)",
"start_time": "开始时间(时间戳)",
"expire_time": "过期时间(时间戳)",
"biz_type": "业务类型biz_type",
"version": "版本号version",
"url": "跳转URL",
"extra_info": "扩展信息extra_info",
},
"transfertable": {
"transfer_id": "转账IDtransfer_id",
"transcation_id": "交易IDtransaction_id原字段拼写保留",
"message_server_id": "关联消息server_id",
"second_message_server_id": "关联第二条转账消息server_id可在 message_*.db::Msg_* 表的 server_id 对应到)",
"session_name": "会话username",
"pay_sub_type": "支付子类型pay_sub_type",
"pay_receiver": "收款方username",
"pay_payer": "付款方username",
"begin_transfer_time": "转账开始时间(时间戳)",
"last_modified_time": "最后修改时间(时间戳)",
"invalid_time": "失效时间(时间戳)",
"last_update_time": "最后更新时间(时间戳)",
"delay_confirm_flag": "延迟确认标志delay_confirm_flag",
"bubble_clicked_flag": "气泡点击标志bubble_clicked_flag",
},
# bizchat.db
"chat_group": {
"brand_user_name": "品牌/公众号usernamebrand_user_name",
"bit_flag": "位标志/开关bit_flag",
"chat_name": "群组名称chat_name",
"user_list": "成员列表(常见为 ; 分隔的 user_id/username 列表;待确认)",
"reserved0": "保留字段reserved0",
"reserved1": "保留字段reserved1",
"reserved2": "保留字段reserved2",
"reserved3": "保留字段reserved3",
},
"user_info": {
"brand_user_name": "品牌/公众号usernamebrand_user_name",
"bit_flag": "位标志/开关bit_flag",
"reserved0": "保留字段reserved0",
"reserved1": "保留字段reserved1",
"reserved2": "保留字段reserved2",
"reserved3": "保留字段reserved3",
},
# sns.db
"snsmessage_tmp3": {
"from_username": "来源用户username评论/点赞发起者)",
"from_nickname": "来源用户昵称(评论/点赞发起者)",
"to_username": "目标用户username被回复/被@的人)",
"to_nickname": "目标用户昵称(被回复/被@的人)",
"comment_flag": "评论标志位样本为0具体 bit 含义待确认)",
},
"snsadtimeline": {
"ad_content": "广告内容ad_content格式待确认",
"remind_source_info": "提醒来源信息remind_source_info格式待确认",
"remind_self_info": "提醒自身信息remind_self_info格式待确认",
"extra_data": "扩展数据extra_data格式待确认",
},
# unspportmsg.db
"unsupportmessage": {
"from_user": "发送者username",
"to_user": "接收者username",
"msg_source": "消息来源附加信息msg_source",
},
# contact.db
"openim_wording": {
"wording": "文案/提示语wording",
"pinyin": "拼音pinyin",
},
# message_*.db / biz_message_*.db (WCDB)
"wcdb_builtin_compression_record": {
"tablename": "表名tableName",
"columns": "被WCDB压缩的列列表columns",
},
# general.db
"revokemessage": {
"to_user_name": "会话username撤回消息所在会话",
"message_type": "消息类型local_type",
"at_user_list": "@用户列表(字段名推断)",
},
"wcfinderlivestatus": {
"finder_username": "视频号作者usernamefinder_username",
"charge_flag": "是否付费/收费标志charge_flag",
},
"new_tips": {
"disable": "禁用标志disable",
"new_tips_content": "提示内容new_tips_content",
},
"redenvelopetable": {
"sender_user_name": "红包发送者username",
"hb_type": "红包类型hb_type",
},
"wacontact": {
"external_info": "外部信息JSON常见包含 BindWxaInfo/RegisterSource/WxaAppDynamic 等)",
"contact_pack_data": "联系人打包数据protobuf-like常含昵称/品牌名等)",
"wx_app_opt": "小程序/应用选项wx_app_opt位标志/开关样本为0",
},
# emoticon.db
"kstoreemoticoncaptionstable": {
"package_id_": "表情包IDpackage_id",
"md5_": "表情md5",
"language_": "语言language",
"caption_": "文案/标题caption",
},
}
KNOWN_TABLE_DESCRIPTIONS: dict[str, str] = {
# contact.db
"biz_info": "公众号信息表(公众号类型/菜单/品牌信息等)",
"chat_room": "群聊基础信息表(群主/成员列表等扩展在 ext_buffer",
"chat_room_info_detail": "群聊详细信息表(群公告/群状态等)",
"chatroom_member": "群聊成员映射表room_id ↔ member_id",
"contact": "联系人核心表(好友/群/公众号等基础信息)",
"contact_label": "联系人标签表标签ID与名称",
"name2id": "用户名wxid/群id@chatroom 等到内部数值ID映射表",
"encrypt_name2id": "加密用户名到内部数值ID映射表",
"stranger": "陌生人/临时会话信息表",
"ticket_info": "票据/会话票据信息表(用途待进一步确认)",
"stranger_ticket_info": "陌生人票据信息表(用途待进一步确认)",
"oplog": "操作/同步日志表(增量同步相关)",
"openim_appid": "OpenIM 应用ID表企业微信/互通相关)",
"openim_acct_type": "OpenIM 账号类型表",
"openim_wording": "OpenIM 文案/提示语表",
# session.db
"sessiontable": "会话列表表(会话展示/未读/置顶/隐藏等)",
"sessiondeletetable": "会话删除记录表",
"sessionunreadlisttable_1": "未读会话列表表(分表)",
"sessionunreadstattable_1": "未读统计表(分表)",
"sessionnocontactinfotable": "会话表(无联系人信息的会话)",
"session_last_message": "会话最后一条消息缓存/索引表(版本/实现差异)",
# message_*.db / biz_message_*.db
"timestamp": "时间戳/增量同步辅助表",
"deleteinfo": "删除消息记录表(删除/撤回相关)",
"deleteresinfo": "删除资源记录表(资源删除相关)",
"sendinfo": "发送相关信息表(发送状态/队列等)",
"historysysmsginfo": "历史系统消息表",
"historyaddmsginfo": "历史新增消息表",
# message_resource.db
"chatname2id": "会话名 → 会话ID 映射表(资源库维度)",
"sendername2id": "发送者名 → 发送者ID 映射表(资源库维度)",
"messageresourceinfo": "消息资源索引表(按消息/会话定位资源)",
"messageresourcedetail": "消息资源明细表md5/路径/大小等)",
"ftsrange": "FTS 范围信息表(搜索/索引辅助)",
"ftsdeleteinfo": "FTS 删除记录表(索引维护)",
# media_0.db
"voiceinfo": "语音数据表voice_data 等)",
# hardlink.db
"db_info": "WCDB Key-Value 元信息表FTS构建状态/版本/扫描时间等)",
"dir2id": "目录 → ID 映射表(硬链接索引)",
"image_hardlink_info_v4": "图片硬链接索引表v4",
"file_hardlink_info_v4": "文件硬链接索引表v4",
"video_hardlink_info_v4": "视频硬链接索引表v4",
"file_checkpoint_v4": "文件索引检查点(增量)",
"video_checkpoint_v4": "视频索引检查点(增量)",
"talker_checkpoint_v4": "会话索引检查点(增量)",
# *_fts.db / message_fts.db
"table_info": "WCDB Key-Value 元信息表(索引范围/水位/时间戳等)",
# head_image.db
"head_image": "头像缓存表(头像 md5/二进制缩略图等)",
# favorite.db
"buff": "WCDB Key-Value 缓冲/配置表(收藏等模块的缓存)",
"fav_db_item": "收藏条目表",
"fav_tag_db_item": "收藏标签表",
"fav_bind_tag_db_item": "收藏条目与标签绑定表",
# emoticon.db
"kcustomemoticonordertable": "自定义表情排序表md5 列表)",
"kexpressrecentuseeemoticontable": "最近使用表情记录Key-Value",
"knonstoreemoticontable": "非商店表情表(用户收藏/外部表情资源含CDN下载信息",
"kstoreemoticonpackagetable": "商店表情包信息表package 元数据)",
"kstoreemoticoncaptionstable": "商店表情文案表(多语言 caption",
# unspportmsg.db
"unsupportmessage": "不支持消息表PC端无法直接展示的消息类型",
# bizchat.db
"chat_group": "BizChat 群组表(企业微信/公众号群组信息)",
"user_info": "BizChat 用户表(企业微信/公众号用户信息)",
"my_user_info": "BizChat 当前账号映射表brand_user_name ↔ user_id",
# general.db
"forwardrecent": "最近转发会话记录表username/时间)",
"transfertable": "转账记录表转账ID/关联消息/状态等)",
"redenvelopetable": "红包记录表(关联消息/状态等)",
"ilink_voip": "iLink/群通话相关表房间ID/成员/状态等)",
"fmessagetable": "好友验证/陌生人消息表FMessage",
"handoff_remind_v0": "跨设备接力/提醒项表handoff_remind_v0",
"biz_pay_status": "公众号文章付费状态表url_id/is_paid 等)",
"biz_subscribe_status": "公众号订阅模板状态表template_id/is_subscribe",
"new_tips": "新提示/新功能提示表",
"reddot": "小红点提示表",
"reddot_record": "小红点记录表",
"wcfinderlivestatus": "视频号直播状态表",
"teenager_apply_access_agree_info": "青少年模式访问同意记录表",
# chat_search_index.db本项目生成
"meta": "索引元数据表schema_version/构建时间等)",
"message_fts": "全文索引表fts5用于搜索",
}
def simple_heuristic(field_name: str, table_name: str) -> str:
"""简易兜底启发式,避免完全空白"""
f = field_name.lower()
t = table_name.lower()
if f.endswith("id") or f in {"_rowid_", "rowid"} or f == "id":
return "标识符字段"
if "time" in f or "timestamp" in f:
return "时间戳字段"
if f in {"name", "user_name", "username"}:
return "用户名/会话名"
if f in {"content", "message_content", "compress_content"}:
return "内容/正文字段"
if "md5" in f:
return "MD5哈希字段"
if "status" in f:
return "状态位/状态码"
if f.startswith("is_"):
return "布尔标志字段"
if f.startswith("wcdb_ct_"):
return "WCDB压缩控制字段"
if "buf" in f or "buffer" in f or "blob" in f:
return "二进制缓冲数据"
if "url" in f:
return "URL链接"
if "size" in f or "count" in f:
return "数量/大小字段"
if "seq" in f:
return "序列号/排序字段"
# 针对 Msg_* 常见列
if t.startswith("msg_"):
if f == "source":
return "消息来源附加信息XML/JSON"
if f == "local_type":
return "本地消息类型local_type"
return "未知用途字段"
def compute_field_meaning(analyzer, table_name: str, field_name: str) -> str:
lt = table_name.lower()
lf = field_name.lower()
# 1) 表级覆盖优先
tmap = KNOWN_FIELD_MEANINGS_BY_TABLE.get(lt)
if tmap and lf in tmap:
return tmap[lf]
# 2) 全局精确映射
if field_name in KNOWN_FIELD_MEANINGS:
return KNOWN_FIELD_MEANINGS[field_name]
if lf in KNOWN_FIELD_MEANINGS:
return KNOWN_FIELD_MEANINGS[lf]
# 额外针对 mmreader/zstd 提示
if lf in {"message_content", "compress_content"}:
return "消息内容部分类型为zstd压缩XMLmmreader"
# 借用项目内启发式
if analyzer is not None:
try:
return analyzer.get_field_meaning(field_name, table_name)
except Exception:
pass
# 简易兜底
return simple_heuristic(field_name, table_name)
def guess_table_desc(analyzer, table_name: str) -> str:
# 简易猜测(优先命中已知表名)
tl = table_name.lower()
# 已知表名(大小写不敏感)
if tl in KNOWN_TABLE_DESCRIPTIONS:
return KNOWN_TABLE_DESCRIPTIONS[tl]
# SQLite / WCDB 内置
if tl == "sqlite_sequence":
return "SQLite 自增序列表"
if tl.startswith("wcdb"):
return "WCDB 内置表(压缩/元数据等)"
# FTS 内部表(多为 *_data/_idx/_config/_content/_docsize/_aux
if "fts" in tl:
if tl.endswith("_data"):
return "全文检索FTS内部数据表"
if tl.endswith("_idx"):
return "全文检索FTS内部索引表"
if tl.endswith("_config"):
return "全文检索FTS内部配置表"
if tl.endswith("_content"):
return "全文检索FTS内部内容表"
if tl.endswith("_docsize"):
return "全文检索FTS内部文档长度表"
if tl.endswith("_aux") or "_aux_" in tl:
return "全文检索FTS辅助表"
return "全文检索FTS表/索引表"
# 借助分析器的启发式(如果可用,且不是“未知功能表”)
if analyzer is not None:
try:
guessed = analyzer.guess_table_function(table_name)
if isinstance(guessed, str) and guessed.strip() and guessed.strip() != "未知功能表":
return guessed.strip()
except Exception:
pass
if tl == "msg" or tl.startswith("msg_"):
return "某会话的消息表(聊天消息数据)"
if "name2id" in tl:
return "用户名到内部ID映射表"
if "contact" in tl:
return "联系人/群聊信息表"
if "session" in tl:
return "会话信息/未读统计表"
if "resource" in tl:
return "消息资源/附件索引表"
if "voice" in tl:
return "语音相关数据表"
if "image" in tl or "img" in tl:
return "图片相关数据表"
if "video" in tl:
return "视频相关数据表"
if "file" in tl:
return "文件相关数据表"
if "sns" in tl:
return "朋友圈相关数据表"
return "未知功能表"
def fill_config(template: dict) -> dict:
# 创建一个分析器实例,仅用于启发式(使用默认配置)
analyzer = None
if AnalyzerCls is not None:
try:
analyzer = AnalyzerCls(databases_path=str(ROOT / "output" / "databases"),
config_file="nonexistent_config.json")
except Exception:
analyzer = None
# 数据库描述补齐
db_desc_map = build_db_descriptions()
def guess_db_desc(db_name: str) -> str:
# 1) 精确映射优先
if db_name in db_desc_map:
return db_desc_map[db_name]
# 2) 常见分片/变体message_{n}.db
m = re.match(r"^message_(\d+)$", db_name)
if m:
return f"聊天记录数据库分片message_{m.group(1)}.db"
# 3) 公众号/企业微信消息库biz_message_{n}.db结构通常同 message_{n}.db
m = re.match(r"^biz_message_(\d+)$", db_name)
if m:
return f"公众号消息记录数据库biz_message_{m.group(1)}.db结构通常同 message_{m.group(1)}.db"
# 4) FTS/索引类库:*_fts.db
if db_name.endswith("_fts"):
return "全文索引数据库FTS"
# 5) 退化到 base 前缀
base = db_name.split("_", 1)[0]
if base in db_desc_map:
return db_desc_map[base]
return "未知用途数据库"
databases = template.get("databases", {})
for db_name, db in databases.items():
if isinstance(db, dict):
# 数据库级描述
if not db.get("description"):
db["description"] = guess_db_desc(db_name)
# 遍历表
tables = db.get("tables", {})
for table_name, table in tables.items():
if not isinstance(table, dict):
continue
# 表功能描述
if not table.get("description"):
table["description"] = guess_table_desc(analyzer, table_name)
# 字段含义补齐
fields = table.get("fields", {})
if isinstance(fields, dict):
for field_name, field_meta in fields.items():
if not isinstance(field_meta, dict):
continue
meaning = field_meta.get("meaning", "")
if not meaning:
field_meta["meaning"] = compute_field_meaning(analyzer, table_name, field_name)
# 消息类型映射补充(保留模板 instructional 字段,另外插入真实映射键)
mt_real = build_message_types_from_ohmywechat()
message_types = template.get("message_types", {})
# 合并:新增真实键
for k, v in mt_real.items():
message_types[k] = v
template["message_types"] = message_types
# 元数据刷新
meta = template.get("_metadata", {})
meta["version"] = "1.1"
meta["generated_time"] = datetime.now().isoformat()
meta["description"] = "微信数据库字段配置由模板自动补全融合启发式与ohmywechat常见类型"
template["_metadata"] = meta
return template
def main():
if not TEMPLATE_PATH.exists():
raise FileNotFoundError(f"Template not found: {TEMPLATE_PATH}")
with TEMPLATE_PATH.open("r", encoding="utf-8") as f:
template = json.load(f)
filled = fill_config(template)
# 写主配置(供分析器默认加载)
with OUTPUT_MAIN.open("w", encoding="utf-8") as f:
json.dump(filled, f, ensure_ascii=False, indent=2)
# 备份写入 output/configs
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
with OUTPUT_COPY.open("w", encoding="utf-8") as f:
json.dump(filled, f, ensure_ascii=False, indent=2)
print("[OK] 生成完成")
print(f"- 主配置: {OUTPUT_MAIN}")
print(f"- 备份: {OUTPUT_COPY}")
# 简要统计
dbs = filled.get("databases", {})
db_count = len(dbs)
tbl_count = sum(len(d.get("tables", {})) for d in dbs.values() if isinstance(d, dict))
print(f"- 数据库数: {db_count}, 表数: {tbl_count}")
print(f"- 消息类型键数: {len(filled.get('message_types', {}))}")
if __name__ == "__main__":
main()