chore(tools): 添加解密与资源调试脚本

- 增加解密/资源/表情/媒体定位等调试脚本，便于本地排查与验证
2026-02-19 14:20:51 +08:00 · 2025-12-17 16:59:49 +08:00
parent 1583c28ebe
commit ebc68de8a8
10 changed files with 1274 additions and 0 deletions
--- a/tools/debug_emoji_content.py
+++ b/tools/debug_emoji_content.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+"""调试表情消息内容"""
+
+import sqlite3
+from pathlib import Path
+
+db_path = Path(r'd:\abc\PycharmProjects\WeChatDataAnalysis\output\databases\wxid_v4mbduwqtzpt22')
+msg_dbs = list(db_path.glob('message_*.db'))
+print(f'Found {len(msg_dbs)} message databases')
+
+for db in msg_dbs[:1]:
+    print(f'\nDatabase: {db.name}')
+    conn = sqlite3.connect(str(db))
+    conn.row_factory = sqlite3.Row
+    
+    # 先查看表结构
+    tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
+    print(f'Tables: {[t[0] for t in tables]}')
+    
+    # 找到消息表
+    for t in tables:
+        tname = t[0]
+        if 'msg' in tname.lower():
+            # 查看列名
+            cols = conn.execute(f"PRAGMA table_info({tname})").fetchall()
+            col_names = [c[1] for c in cols]
+            print(f'Table {tname} columns: {col_names}')
+            
+            # 查找 type=47 的消息
+            type_col = 'local_type' if 'local_type' in col_names else 'type'
+            content_col = 'message_content' if 'message_content' in col_names else 'content'
+            compress_col = 'compress_content' if 'compress_content' in col_names else None
+            
+            query = f"SELECT * FROM {tname} WHERE {type_col} = 47 LIMIT 3"
+            try:
+                rows = conn.execute(query).fetchall()
+                print(f'Found {len(rows)} emoji messages')
+                import zstandard as zstd
+                for r in rows:
+                    d = dict(r)
+                    content = d.get('message_content') or d.get('content') or b''
+                    
+                    # 尝试解压 message_content
+                    if isinstance(content, bytes) and content.startswith(b'\x28\xb5\x2f\xfd'):
+                        try:
+                            dctx = zstd.ZstdDecompressor()
+                            content = dctx.decompress(content).decode('utf-8', errors='replace')
+                        except Exception as e:
+                            print(f'  zstd decompress message_content failed: {e}')
+                    
+                    print(f'  Decompressed content (first 800):')
+                    print(f'  {str(content)[:800]}')
+                    
+                    # 提取 md5 和 cdnurl
+                    import re
+                    md5_match = re.search(r'md5="([^"]+)"', str(content))
+                    cdnurl_match = re.search(r'cdnurl="([^"]+)"', str(content))
+                    thumburl_match = re.search(r'thumburl="([^"]+)"', str(content))
+                    
+                    print(f'  md5: {md5_match.group(1) if md5_match else "NOT FOUND"}')
+                    print(f'  cdnurl: {cdnurl_match.group(1)[:80] if cdnurl_match else "NOT FOUND"}')
+                    print(f'  thumburl: {thumburl_match.group(1)[:80] if thumburl_match else "NOT FOUND"}')
+                    break
+            except Exception as e:
+                print(f'Query failed: {e}')
+    conn.close()