improvement(chat): packed_info md5 提取优先匹配 .dat 文件名

- packed_info 同时包含多个 32hex 时，优先从形如 xxx_t.dat/.t.dat 的文件名中提取 md5；无匹配再回退到原 32hex 众数策略 - 对齐 echotrace 的选择思路，降低取错 md5 概率
2026-02-02 22:10:50 +08:00 · 2025-12-31 11:35:51 +08:00
parent 0349d89def
commit 67358deeef
1 changed files with 16 additions and 0 deletions
--- a/src/wechat_decrypt_tool/chat_helpers.py
+++ b/src/wechat_decrypt_tool/chat_helpers.py
@@ -426,6 +426,7 @@ def _decode_message_content(compress_value: Any, message_value: Any) -> str:


 _MD5_HEX_RE = re.compile(rb"(?i)[0-9a-f]{32}")
+_DAT_MD5_RE = re.compile(rb"(?i)([0-9a-f]{32})(?:[._][thbc])?\.dat")


 def _extract_md5_from_blob(blob: Any) -> str:
@@ -443,6 +444,21 @@ def _extract_md5_from_blob(blob: Any) -> str:

    if not data:
        return ""
+
+    # Prefer md5 that appears as an actual `.dat` filename (incl. _t.dat/.t.dat variants).
+    # This matches echotrace's idea: packed_info often contains multiple 32-hex tokens, but only
+    # the one referenced by a file path is the correct on-disk basename.
+    try:
+        m2 = _DAT_MD5_RE.findall(data)
+    except Exception:
+        m2 = []
+    if m2:
+        best2 = Counter([x.lower() for x in m2]).most_common(1)[0][0]
+        try:
+            return best2.decode("ascii", errors="ignore")
+        except Exception:
+            return ""
+
    m = _MD5_HEX_RE.findall(data)
    if not m:
        return ""