mirror of
https://github.com/LifeArchiveProject/WeChatDataAnalysis.git
synced 2026-02-19 22:30:49 +08:00
improvement(tools): 增强配置模板与字段含义生成
- generate_config_template: 增加 CLI 参数;FTS/PRAGMA 失败时从建表 SQL 兜底解析列\n- generate_wechat_db_config: 扩充库描述/字段含义词典,并支持从 tools/ 目录运行\n- 新增 export_database_schema_markdown:基于 wechat_db_config.json 导出 Markdown 文档
This commit is contained in:
@@ -14,6 +14,7 @@ import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import sys
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
TEMPLATE_PATH = ROOT / "wechat_db_config_template.json"
|
||||
@@ -21,6 +22,10 @@ OUTPUT_MAIN = ROOT / "wechat_db_config.json"
|
||||
OUTPUT_DIR = ROOT / "output" / "configs"
|
||||
OUTPUT_COPY = OUTPUT_DIR / "wechat_db_config.generated.json"
|
||||
|
||||
# 允许从 tools/ 目录运行时仍能 import 根目录模块
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
# 尝试导入分析器以复用其启发式
|
||||
AnalyzerCls = None
|
||||
try:
|
||||
@@ -33,19 +38,24 @@ except Exception:
|
||||
def build_db_descriptions() -> dict[str, str]:
|
||||
return {
|
||||
"message": "聊天记录核心数据库",
|
||||
"message_3": "聊天消息分表数据库(示例或分片)",
|
||||
# message_{n}.db 会在 fill_config 里按正则单独处理(分片/分表)
|
||||
"message_fts": "聊天消息全文索引数据库(FTS)",
|
||||
"message_resource": "消息资源索引数据库(图片/文件/视频等)",
|
||||
"contact": "联系人数据库(好友/群/公众号基础信息)",
|
||||
"session": "会话数据库(会话列表与未读统计)",
|
||||
"sns": "朋友圈数据库(动态与互动)",
|
||||
"favorite": "收藏数据库",
|
||||
"favorite_fts": "收藏全文索引数据库(FTS)",
|
||||
"emoticon": "表情包数据库",
|
||||
"head_image": "头像数据数据库",
|
||||
"hardlink": "硬链接索引数据库(资源去重/快速定位)",
|
||||
"media_0": "媒体数据数据库(含语音SILK等)",
|
||||
"unspportmsg": "不支持消息数据库(客户端不支持的消息类型)",
|
||||
"general": "通用/系统数据库(新消息通知/支付等)",
|
||||
"contact_fts": "联系人全文索引数据库(FTS)",
|
||||
"chat_search_index": "(本项目生成)聊天记录全文检索索引库(FTS5,用于搜索)",
|
||||
"bizchat": "公众号/企业微信相关数据库(会话/联系人等)",
|
||||
"digital_twin": "(本项目生成)数字分身数据库(派生数据,非微信原始库)",
|
||||
}
|
||||
|
||||
|
||||
@@ -172,6 +182,12 @@ KNOWN_FIELD_MEANINGS = {
|
||||
"c4": "FTS列c4(内部结构)",
|
||||
"c5": "FTS列c5(内部结构)",
|
||||
"c6": "FTS列c6(内部结构)",
|
||||
"c7": "FTS列c7(内部结构)",
|
||||
"c8": "FTS列c8(内部结构)",
|
||||
"c9": "FTS列c9(内部结构)",
|
||||
"c10": "FTS列c10(内部结构)",
|
||||
"c11": "FTS列c11(内部结构)",
|
||||
"c12": "FTS列c12(内部结构)",
|
||||
"sz": "FTS文档大小信息",
|
||||
"_rowid_": "SQLite内部行ID",
|
||||
|
||||
@@ -199,12 +215,483 @@ KNOWN_FIELD_MEANINGS = {
|
||||
"last_sender_display_name": "最后一条消息发送者显示名",
|
||||
"last_msg_ext_type": "最后一条消息扩展类型",
|
||||
|
||||
# 常见“Key-Value”配置表(多库复用)
|
||||
"key": "键(Key-Value配置表)",
|
||||
"valueint64": "整数值(int64)",
|
||||
"valuedouble": "浮点值(double)",
|
||||
"valuestdstr": "字符串值(std::string)",
|
||||
"valueblob": "二进制值(blob)",
|
||||
"k": "配置键(k)",
|
||||
"v": "配置值(v)",
|
||||
|
||||
# 常见保留字段
|
||||
"reserved0": "保留字段(reserved0)",
|
||||
"reserved1": "保留字段(reserved1)",
|
||||
"reserved2": "保留字段(reserved2)",
|
||||
"reserved3": "保留字段(reserved3)",
|
||||
|
||||
# 版本/位标志
|
||||
"version": "版本号(记录/结构版本,具体含义依表而定)",
|
||||
"bit_flag": "位标志/开关(bit flags)",
|
||||
|
||||
# 本项目索引/缓存库常见字段
|
||||
"render_type": "渲染类型(本项目定义:text/image/system/...)",
|
||||
"db_stem": "来源数据库分片名(如 message_0)",
|
||||
"table_name": "来源表名(如 Msg_xxx)",
|
||||
"sender_username": "发送者username(解码后)",
|
||||
"preview": "会话预览文本(用于会话列表展示)",
|
||||
"built_at": "构建时间(Unix时间戳,秒)",
|
||||
"tablename": "表名(tableName)",
|
||||
"value": "值(value)",
|
||||
"brand_user_name": "品牌/公众号username(brand_user_name)",
|
||||
|
||||
# 常见业务字段(命名自解释)
|
||||
"ticket": "票据/验证ticket(ticket)",
|
||||
"delete_table_name": "删除记录关联的消息表名(delete_table_name)",
|
||||
"res_path": "资源路径(res_path)",
|
||||
"biz_username": "公众号username(biz_username)",
|
||||
"search_key": "搜索键/索引字段(search_key)",
|
||||
"click_type": "点击/热词类型(click_type)",
|
||||
"a_group_remark": "群备注(FTS检索字段:a_group_remark)",
|
||||
"op_code": "操作码(op_code)",
|
||||
"query": "查询关键词(query)",
|
||||
"score": "评分/权重(score)",
|
||||
"keyword": "关键词(keyword)",
|
||||
"pay_load_": "payload/扩展数据(pay_load_)",
|
||||
"bill_no": "账单号(bill_no)",
|
||||
"session_title": "会话标题(session_title)",
|
||||
"unread_stat": "未读统计字段(unread_stat)",
|
||||
"ui_type": "UI类型/发布类型(ui_type)",
|
||||
"error_type": "错误类型(error_type)",
|
||||
"tips_content": "提示内容(tips_content)",
|
||||
"record_content": "记录内容(record_content)",
|
||||
"business_type": "业务类型(business_type)",
|
||||
"access_content_key": "访问内容key(access_content_key)",
|
||||
"access_content_type": "访问内容类型(access_content_type)",
|
||||
"range_type": "范围类型(range_type)",
|
||||
"message_local_type": "消息类型(message_local_type)",
|
||||
"message_origin_source": "消息来源标识(message_origin_source)",
|
||||
|
||||
# 朋友圈(sns)常见拆分字段
|
||||
"tid_heigh_bit": "tid 高位拆分字段(heigh_bit,字段名原样保留)",
|
||||
"tid_low_bit": "tid 低位拆分字段(low_bit)",
|
||||
"break_flag": "断点/分页标志(0/1;用于分页/增量拉取水位)",
|
||||
|
||||
# WCDB 压缩控制
|
||||
"WCDB_CT_message_content": "WCDB压缩标记(message_content列)",
|
||||
"WCDB_CT_source": "WCDB压缩标记(source列)",
|
||||
}
|
||||
|
||||
|
||||
# 表级字段含义覆盖(优先级高于 KNOWN_FIELD_MEANINGS)
|
||||
# key: table_name.lower() ; value: { field_name.lower(): meaning }
|
||||
KNOWN_FIELD_MEANINGS_BY_TABLE: dict[str, dict[str, str]] = {
|
||||
# contact.db
|
||||
"contact": {
|
||||
"id": "序号(通常与 name2id.rowid 对应)",
|
||||
"username": "联系人的 wxid / 群聊 username(可唯一确定联系人)",
|
||||
"local_type": "联系人类型:1=通讯录好友/公众号/已添加群聊;2=未添加到通讯录的群聊;3=群中的陌生人;5=企业微信好友;6=群聊中的陌生企业微信好友",
|
||||
"alias": "微信号(微信里显示的微信号)",
|
||||
"flag": "联系人标志位(需转二进制;常见:第7位星标,第12位置顶,第17位屏蔽朋友圈,第24位仅聊天)",
|
||||
"head_img_md5": "头像md5(可通过 head_image.db 查询对应头像)",
|
||||
"verify_flag": "认证标志(公众号/企业等;非0常表示公众号)",
|
||||
"description": "描述字段(样本为空;用途待确认)",
|
||||
"extra_buffer": "好友扩展信息(protobuf;包含性别/地区/签名等,本项目解析 gender/signature/country/province/city/source_scene)",
|
||||
"chat_room_notify": "群消息通知相关设置(样本为0/1;疑似免打扰/通知开关,待确认)",
|
||||
"is_in_chat_room": "群聊状态标记(样本为1/2;具体含义待确认)",
|
||||
"chat_room_type": "群聊类型/标志(样本为0/2;具体含义待确认)",
|
||||
},
|
||||
"stranger": {
|
||||
"id": "序号(通常与 name2id.rowid 对应)",
|
||||
"username": "联系人的 wxid / 群聊 username",
|
||||
"local_type": "联系人类型:1=通讯录好友/公众号/已添加群聊;2=未添加到通讯录的群聊;3=群中的陌生人;5=企业微信好友;6=群聊中的陌生企业微信好友",
|
||||
"alias": "微信号(微信里显示的微信号)",
|
||||
"flag": "联系人标志位(需转二进制;常见:第7位星标,第12位置顶,第17位屏蔽朋友圈,第24位仅聊天)",
|
||||
"head_img_md5": "头像md5(可通过 head_image.db 查询对应头像)",
|
||||
"verify_flag": "认证标志(公众号/企业等;非0常表示公众号)",
|
||||
"description": "描述字段(样本为空;用途待确认)",
|
||||
"extra_buffer": "好友扩展信息(protobuf;包含性别/地区/签名等,本项目解析 gender/signature/country/province/city/source_scene)",
|
||||
"chat_room_notify": "群消息通知相关设置(样本为0/1;疑似免打扰/通知开关,待确认)",
|
||||
"is_in_chat_room": "群聊状态标记(样本为1/2;具体含义待确认)",
|
||||
"chat_room_type": "群聊类型/标志(样本为0/2;具体含义待确认)",
|
||||
},
|
||||
"biz_info": {
|
||||
"id": "序号(与 name2id.rowid 对应,可唯一确定一个公众号)",
|
||||
"username": "公众号username(原始 wxid/gh_xxx)",
|
||||
"type": "公众号类型:1=公众号,0=订阅号(资料来源:万字长文)",
|
||||
"accept_type": "接收类型(accept_type;含义待确认,样本常为0)",
|
||||
"child_type": "子类型(child_type;含义待确认,样本常为0)",
|
||||
"version": "版本号(含义待确认,样本常为0)",
|
||||
"external_info": "公众号详细信息(常见 JSON;含底部菜单/交互配置等)",
|
||||
"brand_info": "公众号品牌/菜单信息(常见 JSON:urls 等)",
|
||||
"brand_list": "品牌列表/关联列表(格式待确认,可能为 JSON)",
|
||||
"brand_flag": "品牌/能力标志位(含义待确认)",
|
||||
"belong": "归属字段(含义待确认)",
|
||||
"home_url": "主页链接(含义待确认)",
|
||||
},
|
||||
"chat_room": {
|
||||
"id": "序号(与 name2id.rowid 对应)",
|
||||
"username": "群聊的username(xxx@chatroom)",
|
||||
"owner": "群主username",
|
||||
"ext_buffer": "群成员username与群昵称(protobuf:ChatRoomData.members 等)",
|
||||
},
|
||||
"chat_room_info_detail": {
|
||||
"room_id_": "序号(与 name2id.rowid 对应)",
|
||||
"username_": "群聊的username(xxx@chatroom)",
|
||||
"announcement_": "群公告(文本)",
|
||||
"announcement_editor_": "群公告编辑者username",
|
||||
"announcement_publish_time_": "群公告发布时间(时间戳)",
|
||||
"chat_room_status_": "群状态/标志位(bitmask;样本常见 0x80000 等,具体位含义待确认)",
|
||||
"xml_announcement_": "群公告(XML,可解析更多信息:图片/文件等)",
|
||||
"ext_buffer_": "扩展信息(protobuf-like;样本长度较小,具体结构待确认)",
|
||||
},
|
||||
"chatroom_member": {
|
||||
"room_id": "群聊ID(对应 name2id.rowid)",
|
||||
"member_id": "群成员ID(对应 name2id.rowid)",
|
||||
},
|
||||
"contact_label": {
|
||||
"label_id_": "标签ID",
|
||||
"label_name_": "标签名称",
|
||||
"sort_order_": "排序",
|
||||
},
|
||||
|
||||
# message_*.db / biz_message_*.db
|
||||
"msg_*": {
|
||||
"local_id": "自增id(本地)",
|
||||
"server_id": "服务端id(每条消息唯一)",
|
||||
"local_type": "消息类型(local_type;低32位=type,高32位=sub_type;可用 (local_type & 0xFFFFFFFF) 与 (local_type >> 32) 拆分)",
|
||||
"sort_seq": "排序字段(单会话内消息排序;样本≈create_time*1000)",
|
||||
"real_sender_id": "发送者id(可通过 Name2Id.rowid 映射到 username)",
|
||||
"create_time": "秒级时间戳",
|
||||
"server_seq": "服务端接收顺序id(server_seq)",
|
||||
"message_content": "消息内容:local_type=1 时为文本,其它类型多为 Zstandard 压缩后的XML/二进制",
|
||||
"compress_content": "压缩后的内容(多见 Zstandard)",
|
||||
"packed_info_data": "protobuf扩展信息(图片文件名/语音转文字/合并转发文件夹名等)",
|
||||
},
|
||||
"name2id": {
|
||||
"is_session": "是否会话名标记(1=会话/聊天对象;0=其它映射,如群成员ID)",
|
||||
},
|
||||
|
||||
# session.db
|
||||
"sessiontable": {
|
||||
"type": "会话类型(样本为0;枚举待确认)",
|
||||
"status": "会话状态(样本为0;枚举待确认)",
|
||||
"unread_first_pat_msg_local_id": "未读拍一拍消息的本地ID(样本为0;含义待确认)",
|
||||
"unread_first_pat_msg_sort_seq": "未读拍一拍消息的排序序号(样本为0;含义待确认)",
|
||||
},
|
||||
"session_last_message": {
|
||||
"username": "会话username",
|
||||
"sort_seq": "最后一条消息sort_seq",
|
||||
"local_id": "最后一条消息local_id",
|
||||
"create_time": "最后一条消息create_time(秒级时间戳)",
|
||||
"local_type": "最后一条消息local_type",
|
||||
"sender_username": "最后一条消息发送者username",
|
||||
"preview": "最后一条消息预览文本(用于会话列表)",
|
||||
"db_stem": "来源消息库分片名(如 message_0)",
|
||||
"table_name": "来源消息表名(如 Msg_xxx)",
|
||||
"built_at": "构建时间(Unix时间戳,秒)",
|
||||
},
|
||||
|
||||
# 本项目 chat_search_index.db
|
||||
"message_fts": {
|
||||
"text": "可检索文本(索引内容)",
|
||||
"render_type": "渲染类型(text/system/image/voice/video/emoji/...,本项目定义)",
|
||||
"db_stem": "来源消息库分片名(如 message_0)",
|
||||
"table_name": "来源消息表名(如 Msg_xxx)",
|
||||
"sender_username": "发送者username(解码后)",
|
||||
},
|
||||
|
||||
# emoticon.db
|
||||
"knonstoreemoticontable": {
|
||||
"type": "表情类型(样本均为3;枚举含义待确认)",
|
||||
"caption": "表情说明/标题(caption)",
|
||||
"product_id": "表情包/产品ID(product_id)",
|
||||
"aes_key": "AES密钥(用于CDN下载解密)",
|
||||
"auth_key": "鉴权key(CDN下载)",
|
||||
"extern_md5": "外部资源md5(extern_md5)",
|
||||
},
|
||||
"kstoreemoticonpackagetable": {
|
||||
"package_id_": "表情包ID(package_id)",
|
||||
"package_name_": "表情包名称",
|
||||
"payment_status_": "支付状态(payment_status)",
|
||||
"download_status_": "下载状态(download_status)",
|
||||
"install_time_": "安装时间(时间戳)",
|
||||
"remove_time_": "移除时间(时间戳)",
|
||||
"sort_order_": "排序",
|
||||
"introduction_": "简介(introduction)",
|
||||
"full_description_": "完整描述(full_description)",
|
||||
"copyright_": "版权信息",
|
||||
"author_": "作者信息",
|
||||
"store_icon_url_": "商店图标URL",
|
||||
"panel_url_": "面板/详情页URL",
|
||||
},
|
||||
"kstoreemoticonfilestable": {
|
||||
"package_id_": "表情包ID(package_id)",
|
||||
"md5_": "表情md5",
|
||||
"type_": "表情类型(type)",
|
||||
"sort_order_": "排序",
|
||||
"emoticon_size_": "表情文件大小(字节)",
|
||||
"emoticon_offset_": "表情文件偏移(用于包内定位)",
|
||||
"thumb_size_": "缩略图大小(字节)",
|
||||
"thumb_offset_": "缩略图偏移(用于包内定位)",
|
||||
},
|
||||
|
||||
# favorite.db
|
||||
"fav_db_item": {
|
||||
"version": "版本号(收藏条目结构/内容版本;样本为87)",
|
||||
"fromusr": "来源用户username(收藏来源)",
|
||||
"realchatname": "来源群聊username(若收藏来源于群聊)",
|
||||
"upload_error_code": "上传错误码",
|
||||
"trans_res_error_code": "资源转换错误码(trans_res_error_code)",
|
||||
},
|
||||
|
||||
# general.db
|
||||
"ilink_voip": {
|
||||
"wx_chatroom_": "群聊username(xxx@chatroom)",
|
||||
"millsecond_": "毫秒时间戳/时间标记(字段名推断)",
|
||||
"group_id_": "ILink group_id(字段名推断)",
|
||||
"room_id_": "房间ID(字段名推断)",
|
||||
"room_key_": "房间key(字段名推断)",
|
||||
"route_id_": "路由ID(字段名推断)",
|
||||
"voice_status_": "通话状态(字段名推断)",
|
||||
"talker_create_user_": "发起者username(字段名推断)",
|
||||
"not_friend_user_list_": "非好友成员列表(字段名推断)",
|
||||
"members_": "成员列表(字段名推断)",
|
||||
"is_ilink_": "是否ilink通话(字段名推断)",
|
||||
"ever_quit_chatroom_": "是否曾退出群聊(字段名推断)",
|
||||
},
|
||||
"fmessagetable": {
|
||||
"user_name_": "用户名(好友验证/陌生人会话用户名)",
|
||||
"type_": "消息类型(好友验证/系统消息;样本为37)",
|
||||
"timestamp_": "时间戳",
|
||||
"encrypt_user_name_": "加密用户名",
|
||||
"content_": "内容(验证消息/系统提示等)",
|
||||
"is_sender_": "是否发送方(is_sender)",
|
||||
"ticket_": "票据/验证ticket",
|
||||
"scene_": "来源场景码(scene)",
|
||||
"fmessage_detail_buf_": "详细信息(protobuf-like;包含验证文案/来源等信息)",
|
||||
},
|
||||
"handoff_remind_v0": {
|
||||
"item_id": "条目ID(item_id)",
|
||||
"head_icon": "图标(URL/资源标识)",
|
||||
"title": "标题",
|
||||
"desc_type": "描述类型(desc_type)",
|
||||
"create_time": "创建时间(时间戳)",
|
||||
"start_time": "开始时间(时间戳)",
|
||||
"expire_time": "过期时间(时间戳)",
|
||||
"biz_type": "业务类型(biz_type)",
|
||||
"version": "版本号(version)",
|
||||
"url": "跳转URL",
|
||||
"extra_info": "扩展信息(extra_info)",
|
||||
},
|
||||
"transfertable": {
|
||||
"transfer_id": "转账ID(transfer_id)",
|
||||
"transcation_id": "交易ID(transaction_id,原字段拼写保留)",
|
||||
"message_server_id": "关联消息server_id",
|
||||
"second_message_server_id": "关联第二条转账消息server_id(可在 message_*.db::Msg_* 表的 server_id 对应到)",
|
||||
"session_name": "会话username",
|
||||
"pay_sub_type": "支付子类型(pay_sub_type)",
|
||||
"pay_receiver": "收款方username",
|
||||
"pay_payer": "付款方username",
|
||||
"begin_transfer_time": "转账开始时间(时间戳)",
|
||||
"last_modified_time": "最后修改时间(时间戳)",
|
||||
"invalid_time": "失效时间(时间戳)",
|
||||
"last_update_time": "最后更新时间(时间戳)",
|
||||
"delay_confirm_flag": "延迟确认标志(delay_confirm_flag)",
|
||||
"bubble_clicked_flag": "气泡点击标志(bubble_clicked_flag)",
|
||||
},
|
||||
|
||||
# bizchat.db
|
||||
"chat_group": {
|
||||
"brand_user_name": "品牌/公众号username(brand_user_name)",
|
||||
"bit_flag": "位标志/开关(bit_flag)",
|
||||
"chat_name": "群组名称(chat_name)",
|
||||
"user_list": "成员列表(常见为 ; 分隔的 user_id/username 列表;待确认)",
|
||||
"reserved0": "保留字段(reserved0)",
|
||||
"reserved1": "保留字段(reserved1)",
|
||||
"reserved2": "保留字段(reserved2)",
|
||||
"reserved3": "保留字段(reserved3)",
|
||||
},
|
||||
"user_info": {
|
||||
"brand_user_name": "品牌/公众号username(brand_user_name)",
|
||||
"bit_flag": "位标志/开关(bit_flag)",
|
||||
"reserved0": "保留字段(reserved0)",
|
||||
"reserved1": "保留字段(reserved1)",
|
||||
"reserved2": "保留字段(reserved2)",
|
||||
"reserved3": "保留字段(reserved3)",
|
||||
},
|
||||
|
||||
# sns.db
|
||||
"snsmessage_tmp3": {
|
||||
"from_username": "来源用户username(评论/点赞发起者)",
|
||||
"from_nickname": "来源用户昵称(评论/点赞发起者)",
|
||||
"to_username": "目标用户username(被回复/被@的人)",
|
||||
"to_nickname": "目标用户昵称(被回复/被@的人)",
|
||||
"comment_flag": "评论标志位(样本为0;具体 bit 含义待确认)",
|
||||
},
|
||||
"snsadtimeline": {
|
||||
"ad_content": "广告内容(ad_content,格式待确认)",
|
||||
"remind_source_info": "提醒来源信息(remind_source_info,格式待确认)",
|
||||
"remind_self_info": "提醒自身信息(remind_self_info,格式待确认)",
|
||||
"extra_data": "扩展数据(extra_data,格式待确认)",
|
||||
},
|
||||
|
||||
# unspportmsg.db
|
||||
"unsupportmessage": {
|
||||
"from_user": "发送者username",
|
||||
"to_user": "接收者username",
|
||||
"msg_source": "消息来源附加信息(msg_source)",
|
||||
},
|
||||
|
||||
# contact.db
|
||||
"openim_wording": {
|
||||
"wording": "文案/提示语(wording)",
|
||||
"pinyin": "拼音(pinyin)",
|
||||
},
|
||||
|
||||
# message_*.db / biz_message_*.db (WCDB)
|
||||
"wcdb_builtin_compression_record": {
|
||||
"tablename": "表名(tableName)",
|
||||
"columns": "被WCDB压缩的列列表(columns)",
|
||||
},
|
||||
|
||||
# general.db
|
||||
"revokemessage": {
|
||||
"to_user_name": "会话username(撤回消息所在会话)",
|
||||
"message_type": "消息类型(local_type)",
|
||||
"at_user_list": "@用户列表(字段名推断)",
|
||||
},
|
||||
"wcfinderlivestatus": {
|
||||
"finder_username": "视频号作者username(finder_username)",
|
||||
"charge_flag": "是否付费/收费标志(charge_flag)",
|
||||
},
|
||||
"new_tips": {
|
||||
"disable": "禁用标志(disable)",
|
||||
"new_tips_content": "提示内容(new_tips_content)",
|
||||
},
|
||||
"redenvelopetable": {
|
||||
"sender_user_name": "红包发送者username",
|
||||
"hb_type": "红包类型(hb_type)",
|
||||
},
|
||||
"wacontact": {
|
||||
"external_info": "外部信息(JSON;常见包含 BindWxaInfo/RegisterSource/WxaAppDynamic 等)",
|
||||
"contact_pack_data": "联系人打包数据(protobuf-like;常含昵称/品牌名等)",
|
||||
"wx_app_opt": "小程序/应用选项(wx_app_opt;位标志/开关;样本为0)",
|
||||
},
|
||||
|
||||
# emoticon.db
|
||||
"kstoreemoticoncaptionstable": {
|
||||
"package_id_": "表情包ID(package_id)",
|
||||
"md5_": "表情md5",
|
||||
"language_": "语言(language)",
|
||||
"caption_": "文案/标题(caption)",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
KNOWN_TABLE_DESCRIPTIONS: dict[str, str] = {
|
||||
# contact.db
|
||||
"biz_info": "公众号信息表(公众号类型/菜单/品牌信息等)",
|
||||
"chat_room": "群聊基础信息表(群主/成员列表等扩展在 ext_buffer)",
|
||||
"chat_room_info_detail": "群聊详细信息表(群公告/群状态等)",
|
||||
"chatroom_member": "群聊成员映射表(room_id ↔ member_id)",
|
||||
"contact": "联系人核心表(好友/群/公众号等基础信息)",
|
||||
"contact_label": "联系人标签表(标签ID与名称)",
|
||||
"name2id": "用户名(wxid/群id@chatroom 等)到内部数值ID映射表",
|
||||
"encrypt_name2id": "加密用户名到内部数值ID映射表",
|
||||
"stranger": "陌生人/临时会话信息表",
|
||||
"ticket_info": "票据/会话票据信息表(用途待进一步确认)",
|
||||
"stranger_ticket_info": "陌生人票据信息表(用途待进一步确认)",
|
||||
"oplog": "操作/同步日志表(增量同步相关)",
|
||||
"openim_appid": "OpenIM 应用ID表(企业微信/互通相关)",
|
||||
"openim_acct_type": "OpenIM 账号类型表",
|
||||
"openim_wording": "OpenIM 文案/提示语表",
|
||||
|
||||
# session.db
|
||||
"sessiontable": "会话列表表(会话展示/未读/置顶/隐藏等)",
|
||||
"sessiondeletetable": "会话删除记录表",
|
||||
"sessionunreadlisttable_1": "未读会话列表表(分表)",
|
||||
"sessionunreadstattable_1": "未读统计表(分表)",
|
||||
"sessionnocontactinfotable": "会话表(无联系人信息的会话)",
|
||||
"session_last_message": "会话最后一条消息缓存/索引表(版本/实现差异)",
|
||||
|
||||
# message_*.db / biz_message_*.db
|
||||
"timestamp": "时间戳/增量同步辅助表",
|
||||
"deleteinfo": "删除消息记录表(删除/撤回相关)",
|
||||
"deleteresinfo": "删除资源记录表(资源删除相关)",
|
||||
"sendinfo": "发送相关信息表(发送状态/队列等)",
|
||||
"historysysmsginfo": "历史系统消息表",
|
||||
"historyaddmsginfo": "历史新增消息表",
|
||||
|
||||
# message_resource.db
|
||||
"chatname2id": "会话名 → 会话ID 映射表(资源库维度)",
|
||||
"sendername2id": "发送者名 → 发送者ID 映射表(资源库维度)",
|
||||
"messageresourceinfo": "消息资源索引表(按消息/会话定位资源)",
|
||||
"messageresourcedetail": "消息资源明细表(md5/路径/大小等)",
|
||||
"ftsrange": "FTS 范围信息表(搜索/索引辅助)",
|
||||
"ftsdeleteinfo": "FTS 删除记录表(索引维护)",
|
||||
|
||||
# media_0.db
|
||||
"voiceinfo": "语音数据表(voice_data 等)",
|
||||
|
||||
# hardlink.db
|
||||
"db_info": "WCDB Key-Value 元信息表(FTS构建状态/版本/扫描时间等)",
|
||||
"dir2id": "目录 → ID 映射表(硬链接索引)",
|
||||
"image_hardlink_info_v4": "图片硬链接索引表(v4)",
|
||||
"file_hardlink_info_v4": "文件硬链接索引表(v4)",
|
||||
"video_hardlink_info_v4": "视频硬链接索引表(v4)",
|
||||
"file_checkpoint_v4": "文件索引检查点(增量)",
|
||||
"video_checkpoint_v4": "视频索引检查点(增量)",
|
||||
"talker_checkpoint_v4": "会话索引检查点(增量)",
|
||||
|
||||
# *_fts.db / message_fts.db
|
||||
"table_info": "WCDB Key-Value 元信息表(索引范围/水位/时间戳等)",
|
||||
|
||||
# head_image.db
|
||||
"head_image": "头像缓存表(头像 md5/二进制缩略图等)",
|
||||
|
||||
# favorite.db
|
||||
"buff": "WCDB Key-Value 缓冲/配置表(收藏等模块的缓存)",
|
||||
"fav_db_item": "收藏条目表",
|
||||
"fav_tag_db_item": "收藏标签表",
|
||||
"fav_bind_tag_db_item": "收藏条目与标签绑定表",
|
||||
|
||||
# emoticon.db
|
||||
"kcustomemoticonordertable": "自定义表情排序表(md5 列表)",
|
||||
"kexpressrecentuseeemoticontable": "最近使用表情记录(Key-Value)",
|
||||
"knonstoreemoticontable": "非商店表情表(用户收藏/外部表情资源;含CDN下载信息)",
|
||||
"kstoreemoticonpackagetable": "商店表情包信息表(package 元数据)",
|
||||
"kstoreemoticoncaptionstable": "商店表情文案表(多语言 caption)",
|
||||
|
||||
# unspportmsg.db
|
||||
"unsupportmessage": "不支持消息表(PC端无法直接展示的消息类型)",
|
||||
|
||||
# bizchat.db
|
||||
"chat_group": "BizChat 群组表(企业微信/公众号群组信息)",
|
||||
"user_info": "BizChat 用户表(企业微信/公众号用户信息)",
|
||||
"my_user_info": "BizChat 当前账号映射表(brand_user_name ↔ user_id)",
|
||||
|
||||
# general.db
|
||||
"forwardrecent": "最近转发会话记录表(username/时间)",
|
||||
"transfertable": "转账记录表(转账ID/关联消息/状态等)",
|
||||
"redenvelopetable": "红包记录表(关联消息/状态等)",
|
||||
"ilink_voip": "iLink/群通话相关表(房间ID/成员/状态等)",
|
||||
"fmessagetable": "好友验证/陌生人消息表(FMessage)",
|
||||
"handoff_remind_v0": "跨设备接力/提醒项表(handoff_remind_v0)",
|
||||
"biz_pay_status": "公众号文章付费状态表(url_id/is_paid 等)",
|
||||
"biz_subscribe_status": "公众号订阅模板状态表(template_id/is_subscribe)",
|
||||
"new_tips": "新提示/新功能提示表",
|
||||
"reddot": "小红点提示表",
|
||||
"reddot_record": "小红点记录表",
|
||||
"wcfinderlivestatus": "视频号直播状态表",
|
||||
"teenager_apply_access_agree_info": "青少年模式访问同意记录表",
|
||||
|
||||
# chat_search_index.db(本项目生成)
|
||||
"meta": "索引元数据表(schema_version/构建时间等)",
|
||||
"message_fts": "全文索引表(fts5,用于搜索)",
|
||||
}
|
||||
|
||||
|
||||
def simple_heuristic(field_name: str, table_name: str) -> str:
|
||||
"""简易兜底启发式,避免完全空白"""
|
||||
f = field_name.lower()
|
||||
@@ -243,10 +730,17 @@ def simple_heuristic(field_name: str, table_name: str) -> str:
|
||||
|
||||
|
||||
def compute_field_meaning(analyzer, table_name: str, field_name: str) -> str:
|
||||
# 优先精确已知映射
|
||||
lt = table_name.lower()
|
||||
lf = field_name.lower()
|
||||
|
||||
# 1) 表级覆盖优先
|
||||
tmap = KNOWN_FIELD_MEANINGS_BY_TABLE.get(lt)
|
||||
if tmap and lf in tmap:
|
||||
return tmap[lf]
|
||||
|
||||
# 2) 全局精确映射
|
||||
if field_name in KNOWN_FIELD_MEANINGS:
|
||||
return KNOWN_FIELD_MEANINGS[field_name]
|
||||
lf = field_name.lower()
|
||||
if lf in KNOWN_FIELD_MEANINGS:
|
||||
return KNOWN_FIELD_MEANINGS[lf]
|
||||
|
||||
@@ -266,13 +760,44 @@ def compute_field_meaning(analyzer, table_name: str, field_name: str) -> str:
|
||||
|
||||
|
||||
def guess_table_desc(analyzer, table_name: str) -> str:
|
||||
# 简易猜测(优先命中已知表名)
|
||||
tl = table_name.lower()
|
||||
|
||||
# 已知表名(大小写不敏感)
|
||||
if tl in KNOWN_TABLE_DESCRIPTIONS:
|
||||
return KNOWN_TABLE_DESCRIPTIONS[tl]
|
||||
|
||||
# SQLite / WCDB 内置
|
||||
if tl == "sqlite_sequence":
|
||||
return "SQLite 自增序列表"
|
||||
if tl.startswith("wcdb"):
|
||||
return "WCDB 内置表(压缩/元数据等)"
|
||||
|
||||
# FTS 内部表(多为 *_data/_idx/_config/_content/_docsize/_aux)
|
||||
if "fts" in tl:
|
||||
if tl.endswith("_data"):
|
||||
return "全文检索(FTS)内部数据表"
|
||||
if tl.endswith("_idx"):
|
||||
return "全文检索(FTS)内部索引表"
|
||||
if tl.endswith("_config"):
|
||||
return "全文检索(FTS)内部配置表"
|
||||
if tl.endswith("_content"):
|
||||
return "全文检索(FTS)内部内容表"
|
||||
if tl.endswith("_docsize"):
|
||||
return "全文检索(FTS)内部文档长度表"
|
||||
if tl.endswith("_aux") or "_aux_" in tl:
|
||||
return "全文检索(FTS)辅助表"
|
||||
return "全文检索(FTS)表/索引表"
|
||||
|
||||
# 借助分析器的启发式(如果可用,且不是“未知功能表”)
|
||||
if analyzer is not None:
|
||||
try:
|
||||
return analyzer.guess_table_function(table_name)
|
||||
guessed = analyzer.guess_table_function(table_name)
|
||||
if isinstance(guessed, str) and guessed.strip() and guessed.strip() != "未知功能表":
|
||||
return guessed.strip()
|
||||
except Exception:
|
||||
pass
|
||||
# 简易猜测
|
||||
tl = table_name.lower()
|
||||
|
||||
if tl == "msg" or tl.startswith("msg_"):
|
||||
return "某会话的消息表(聊天消息数据)"
|
||||
if "name2id" in tl:
|
||||
@@ -281,10 +806,18 @@ def guess_table_desc(analyzer, table_name: str) -> str:
|
||||
return "联系人/群聊信息表"
|
||||
if "session" in tl:
|
||||
return "会话信息/未读统计表"
|
||||
if "fts" in tl:
|
||||
return "全文检索(FTS)内部表"
|
||||
if "resource" in tl:
|
||||
return "消息资源/附件索引表"
|
||||
if "voice" in tl:
|
||||
return "语音相关数据表"
|
||||
if "image" in tl or "img" in tl:
|
||||
return "图片相关数据表"
|
||||
if "video" in tl:
|
||||
return "视频相关数据表"
|
||||
if "file" in tl:
|
||||
return "文件相关数据表"
|
||||
if "sns" in tl:
|
||||
return "朋友圈相关数据表"
|
||||
return "未知功能表"
|
||||
|
||||
|
||||
@@ -301,13 +834,38 @@ def fill_config(template: dict) -> dict:
|
||||
# 数据库描述补齐
|
||||
db_desc_map = build_db_descriptions()
|
||||
|
||||
def guess_db_desc(db_name: str) -> str:
|
||||
# 1) 精确映射优先
|
||||
if db_name in db_desc_map:
|
||||
return db_desc_map[db_name]
|
||||
|
||||
# 2) 常见分片/变体:message_{n}.db
|
||||
m = re.match(r"^message_(\d+)$", db_name)
|
||||
if m:
|
||||
return f"聊天记录数据库分片(message_{m.group(1)}.db)"
|
||||
|
||||
# 3) 公众号/企业微信消息库:biz_message_{n}.db(结构通常同 message_{n}.db)
|
||||
m = re.match(r"^biz_message_(\d+)$", db_name)
|
||||
if m:
|
||||
return f"公众号消息记录数据库(biz_message_{m.group(1)}.db,结构通常同 message_{m.group(1)}.db)"
|
||||
|
||||
# 4) FTS/索引类库:*_fts.db
|
||||
if db_name.endswith("_fts"):
|
||||
return "全文索引数据库(FTS)"
|
||||
|
||||
# 5) 退化到 base 前缀
|
||||
base = db_name.split("_", 1)[0]
|
||||
if base in db_desc_map:
|
||||
return db_desc_map[base]
|
||||
|
||||
return "未知用途数据库"
|
||||
|
||||
databases = template.get("databases", {})
|
||||
for db_name, db in databases.items():
|
||||
if isinstance(db, dict):
|
||||
# 数据库级描述
|
||||
if not db.get("description"):
|
||||
# 用已知映射或尝试推断
|
||||
db["description"] = db_desc_map.get(db_name, db.get("description", "")) or "未知用途数据库"
|
||||
db["description"] = guess_db_desc(db_name)
|
||||
|
||||
# 遍历表
|
||||
tables = db.get("tables", {})
|
||||
@@ -378,4 +936,4 @@ def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user