fix(chat): 群聊发送者解析支持 alias 并避免覆盖 sender_username

- _split_group_sender_prefix 支持传入已知 sender(username/alias),弱特征场景避免误拆前缀

- 群聊消息按需读取 contact.db 的 alias 辅助解析昵称:\n前缀(带缓存)

- 仅在 sender 缺失时回填前缀,避免昵称覆盖 sender id
This commit is contained in:
2977094657
2026-01-04 14:03:52 +08:00
parent 03af287aea
commit ea0547eab3
3 changed files with 230 additions and 20 deletions

View File

@@ -833,15 +833,6 @@ def _iter_rows_for_conversation(
raw_text = _decode_message_content(r["compress_content"], r["message_content"]).strip()
is_group = bool(conv_username.endswith("@chatroom"))
sender_prefix = ""
if is_group and raw_text and (not raw_text.startswith("<")) and (not raw_text.startswith('"<')):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
if is_group and sender_prefix:
sender_username = sender_prefix
if is_group and raw_text and (raw_text.startswith("<") or raw_text.startswith('"<')):
xml_sender = _extract_sender_from_group_xml(raw_text)
if xml_sender:
sender_username = xml_sender
if is_sent:
sender_username = account_wxid
@@ -881,8 +872,21 @@ def _parse_message_for_export(
is_group: bool,
resource_conn: Optional[sqlite3.Connection],
resource_chat_id: Optional[int],
sender_alias: str = "",
) -> dict[str, Any]:
raw_text = row.raw_text or ""
sender_username = str(row.sender_username or "").strip()
if is_group and raw_text and (not raw_text.startswith("<")) and (not raw_text.startswith('"<')):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text, sender_username, sender_alias)
if sender_prefix and (not sender_username):
sender_username = sender_prefix
if is_group and raw_text and (raw_text.startswith("<") or raw_text.startswith('"<')):
xml_sender = _extract_sender_from_group_xml(raw_text)
if xml_sender:
sender_username = xml_sender
local_type = int(row.local_type or 0)
is_sent = bool(row.is_sent)
@@ -1152,7 +1156,7 @@ def _parse_message_for_export(
"type": local_type,
"renderType": render_type,
"isSent": bool(is_sent),
"senderUsername": row.sender_username,
"senderUsername": sender_username,
"conversationUsername": conv_username,
"isGroup": bool(is_group),
"content": content_text,
@@ -1216,6 +1220,38 @@ def _write_conversation_json(
arcname = f"{conv_dir}/messages.json"
exported = 0
contact_conn: Optional[sqlite3.Connection] = None
alias_cache: dict[str, str] = {}
if conv_is_group:
try:
contact_db_path = account_dir / "contact.db"
if contact_db_path.exists():
contact_conn = sqlite3.connect(str(contact_db_path))
except Exception:
contact_conn = None
def lookup_alias(username: str) -> str:
u = str(username or "").strip()
if not u or contact_conn is None:
return ""
if u in alias_cache:
return alias_cache[u]
alias = ""
try:
r = contact_conn.execute("SELECT alias FROM contact WHERE username = ? LIMIT 1", (u,)).fetchone()
if r is not None and r[0] is not None:
alias = str(r[0] or "").strip()
if not alias:
r = contact_conn.execute("SELECT alias FROM stranger WHERE username = ? LIMIT 1", (u,)).fetchone()
if r is not None and r[0] is not None:
alias = str(r[0] or "").strip()
except Exception:
alias = ""
alias_cache[u] = alias
return alias
# NOTE: Do not keep an entry handle opened while also writing other entries (avatars/media).
# zipfile forbids interleaving writes; stream to a temp file then add it to zip at the end.
with tempfile.TemporaryDirectory(prefix="wechat_chat_export_") as tmp_dir:
@@ -1263,12 +1299,28 @@ def _write_conversation_json(
local_types=local_types,
):
scanned += 1
sender_alias = ""
if conv_is_group and row.raw_text and (not row.raw_text.startswith("<")) and (not row.raw_text.startswith('"<')):
sep = row.raw_text.find(":\n")
if sep > 0:
prefix = row.raw_text[:sep].strip()
su = str(row.sender_username or "").strip()
if prefix and su and prefix != su:
strong_hint = prefix.startswith("wxid_") or prefix.endswith("@chatroom") or "@" in prefix
if not strong_hint:
body_probe = row.raw_text[sep + 2 :].lstrip("\n").lstrip()
body_is_xml = body_probe.startswith("<") or body_probe.startswith('"<')
if not body_is_xml:
sender_alias = lookup_alias(su)
msg = _parse_message_for_export(
row=row,
conv_username=conv_username,
is_group=conv_is_group,
resource_conn=resource_conn,
resource_chat_id=resource_chat_id,
sender_alias=sender_alias,
)
if want_types:
rt_key = _normalize_render_type_key(msg.get("renderType"))
@@ -1326,6 +1378,12 @@ def _write_conversation_json(
tw.flush()
zf.write(str(tmp_path), arcname)
if contact_conn is not None:
try:
contact_conn.close()
except Exception:
pass
return exported
@@ -1360,6 +1418,38 @@ def _write_conversation_txt(
arcname = f"{conv_dir}/messages.txt"
exported = 0
contact_conn: Optional[sqlite3.Connection] = None
alias_cache: dict[str, str] = {}
if conv_is_group:
try:
contact_db_path = account_dir / "contact.db"
if contact_db_path.exists():
contact_conn = sqlite3.connect(str(contact_db_path))
except Exception:
contact_conn = None
def lookup_alias(username: str) -> str:
u = str(username or "").strip()
if not u or contact_conn is None:
return ""
if u in alias_cache:
return alias_cache[u]
alias = ""
try:
r = contact_conn.execute("SELECT alias FROM contact WHERE username = ? LIMIT 1", (u,)).fetchone()
if r is not None and r[0] is not None:
alias = str(r[0] or "").strip()
if not alias:
r = contact_conn.execute("SELECT alias FROM stranger WHERE username = ? LIMIT 1", (u,)).fetchone()
if r is not None and r[0] is not None:
alias = str(r[0] or "").strip()
except Exception:
alias = ""
alias_cache[u] = alias
return alias
# Same as JSON: write to temp file first to avoid zip interleaving writes.
with tempfile.TemporaryDirectory(prefix="wechat_chat_export_") as tmp_dir:
tmp_path = Path(tmp_dir) / "messages.txt"
@@ -1391,12 +1481,27 @@ def _write_conversation_txt(
local_types=local_types,
):
scanned += 1
sender_alias = ""
if conv_is_group and row.raw_text and (not row.raw_text.startswith("<")) and (not row.raw_text.startswith('"<')):
sep = row.raw_text.find(":\n")
if sep > 0:
prefix = row.raw_text[:sep].strip()
su = str(row.sender_username or "").strip()
if prefix and su and prefix != su:
strong_hint = prefix.startswith("wxid_") or prefix.endswith("@chatroom") or "@" in prefix
if not strong_hint:
body_probe = row.raw_text[sep + 2 :].lstrip("\n").lstrip()
body_is_xml = body_probe.startswith("<") or body_probe.startswith('"<')
if not body_is_xml:
sender_alias = lookup_alias(su)
msg = _parse_message_for_export(
row=row,
conv_username=conv_username,
is_group=conv_is_group,
resource_conn=resource_conn,
resource_chat_id=resource_chat_id,
sender_alias=sender_alias,
)
if want_types:
rt_key = _normalize_render_type_key(msg.get("renderType"))
@@ -1449,6 +1554,12 @@ def _write_conversation_txt(
tw.flush()
zf.write(str(tmp_path), arcname)
if contact_conn is not None:
try:
contact_conn.close()
except Exception:
pass
return exported

View File

@@ -694,7 +694,11 @@ def _infer_transfer_status_text(
return "转账"
def _split_group_sender_prefix(text: str) -> tuple[str, str]:
def _split_group_sender_prefix(
text: str,
known_sender_username: str = "",
known_sender_alias: str = "",
) -> tuple[str, str]:
if not text:
return "", text
sep = text.find(":\n")
@@ -706,7 +710,21 @@ def _split_group_sender_prefix(text: str) -> tuple[str, str]:
return "", text
if re.search(r"\s", prefix):
return "", text
if prefix.startswith("wxid_") or prefix.endswith("@chatroom") or "@" in prefix:
strong_hint = prefix.startswith("wxid_") or prefix.endswith("@chatroom") or "@" in prefix
probe = body.lstrip()
body_is_xml = probe.startswith("<") or probe.startswith('"<')
known_values = {str(known_sender_username or "").strip(), str(known_sender_alias or "").strip()}
known_values.discard("")
if known_values:
if prefix in known_values:
return prefix, body
if strong_hint or body_is_xml:
return prefix, body
return "", text
if strong_hint or body_is_xml:
return prefix, body
return "", text
@@ -1013,7 +1031,7 @@ def _build_latest_message_preview(
raw_text = (raw_text or "").strip()
sender_prefix = ""
if is_group and raw_text and (not raw_text.startswith("<")) and (not raw_text.startswith('"<')):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
sender_prefix, raw_text = _split_group_sender_prefix(raw_text, sender_username)
if is_group and (not sender_prefix) and sender_username:
sender_prefix = str(sender_username).strip()
@@ -1400,7 +1418,7 @@ def _row_to_search_hit(
sender_prefix = ""
if is_group and raw_text and (not raw_text.startswith("<")) and (not raw_text.startswith('"<')):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
sender_prefix, raw_text = _split_group_sender_prefix(raw_text, sender_username)
if is_group and sender_prefix:
sender_username = sender_prefix

View File

@@ -111,6 +111,33 @@ def _normalize_chat_source(value: Optional[str]) -> str:
raise HTTPException(status_code=400, detail="Invalid source, use 'decrypted' or 'realtime'.")
def _lookup_contact_alias(
conn: Optional[sqlite3.Connection],
cache: dict[str, str],
username: str,
) -> str:
u = str(username or "").strip()
if not u or conn is None:
return ""
if u in cache:
return cache[u]
alias = ""
try:
r = conn.execute("SELECT alias FROM contact WHERE username = ? LIMIT 1", (u,)).fetchone()
if r is not None and r[0] is not None:
alias = str(r[0] or "").strip()
if not alias:
r = conn.execute("SELECT alias FROM stranger WHERE username = ? LIMIT 1", (u,)).fetchone()
if r is not None and r[0] is not None:
alias = str(r[0] or "").strip()
except Exception:
alias = ""
cache[u] = alias
return alias
def _scan_db_storage_mtime_ns(db_storage_dir: Path) -> int:
try:
base = str(db_storage_dir)
@@ -1386,6 +1413,16 @@ def _append_full_messages_from_rows(
resource_conn: Optional[sqlite3.Connection],
resource_chat_id: Optional[int],
) -> None:
contact_conn: Optional[sqlite3.Connection] = None
alias_cache: dict[str, str] = {}
if is_group:
try:
contact_db_path = account_dir / "contact.db"
if contact_db_path.exists():
contact_conn = sqlite3.connect(str(contact_db_path))
except Exception:
contact_conn = None
for r in rows:
local_id = int(r["local_id"] or 0)
create_time = int(r["create_time"] or 0)
@@ -1448,10 +1485,21 @@ def _append_full_messages_from_rows(
raw_text = raw_text.strip()
sender_prefix = ""
if is_group and not raw_text.startswith("<") and not raw_text.startswith('"<'):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
if is_group and raw_text and (not raw_text.startswith("<")) and (not raw_text.startswith('"<')):
sender_alias = ""
sep = raw_text.find(":\n")
if sep > 0:
prefix = raw_text[:sep].strip()
if prefix and sender_username and prefix != sender_username:
strong_hint = prefix.startswith("wxid_") or prefix.endswith("@chatroom") or "@" in prefix
if not strong_hint:
body_probe = raw_text[sep + 2 :].lstrip("\n").lstrip()
body_is_xml = body_probe.startswith("<") or body_probe.startswith('"<')
if not body_is_xml:
sender_alias = _lookup_contact_alias(contact_conn, alias_cache, sender_username)
sender_prefix, raw_text = _split_group_sender_prefix(raw_text, sender_username, sender_alias)
if is_group and sender_prefix:
if is_group and sender_prefix and (not sender_username):
sender_username = sender_prefix
if is_group and (raw_text.startswith("<") or raw_text.startswith('"<')):
@@ -1812,6 +1860,12 @@ def _append_full_messages_from_rows(
}
)
if contact_conn is not None:
try:
contact_conn.close()
except Exception:
pass
def _postprocess_full_messages(
*,
@@ -2301,6 +2355,16 @@ def _collect_chat_messages(
pat_usernames: set[str] = set()
has_more_any = False
contact_conn: Optional[sqlite3.Connection] = None
alias_cache: dict[str, str] = {}
if is_group:
try:
contact_db_path = account_dir / "contact.db"
if contact_db_path.exists():
contact_conn = sqlite3.connect(str(contact_db_path))
except Exception:
contact_conn = None
for db_path in db_paths:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
@@ -2384,10 +2448,21 @@ def _collect_chat_messages(
raw_text = raw_text.strip()
sender_prefix = ""
if is_group and not raw_text.startswith("<") and not raw_text.startswith('"<'):
sender_prefix, raw_text = _split_group_sender_prefix(raw_text)
if is_group and raw_text and (not raw_text.startswith("<")) and (not raw_text.startswith('"<')):
sender_alias = ""
sep = raw_text.find(":\n")
if sep > 0:
prefix = raw_text[:sep].strip()
if prefix and sender_username and prefix != sender_username:
strong_hint = prefix.startswith("wxid_") or prefix.endswith("@chatroom") or "@" in prefix
if not strong_hint:
body_probe = raw_text[sep + 2 :].lstrip("\n").lstrip()
body_is_xml = body_probe.startswith("<") or body_probe.startswith('"<')
if not body_is_xml:
sender_alias = _lookup_contact_alias(contact_conn, alias_cache, sender_username)
sender_prefix, raw_text = _split_group_sender_prefix(raw_text, sender_username, sender_alias)
if is_group and sender_prefix:
if is_group and sender_prefix and (not sender_username):
sender_username = sender_prefix
if is_group and (raw_text.startswith("<") or raw_text.startswith('"<')):
@@ -2744,6 +2819,12 @@ def _collect_chat_messages(
finally:
conn.close()
if contact_conn is not None:
try:
contact_conn.close()
except Exception:
pass
return merged, has_more_any, sender_usernames, quote_usernames, pat_usernames