mirror of
https://github.com/LifeArchiveProject/WeChatDataAnalysis.git
synced 2026-02-19 14:20:51 +08:00
feat(contacts): 联系人支持拼音分组并补充性别/签名
- 解析 extra_buffer 补齐 gender/signature\n- 返回 pinyinKey/pinyinInitial,前端按 A-Z/# 分组排序展示\n- tests: 更新联系人导出用例覆盖新增字段
This commit is contained in:
@@ -46,8 +46,12 @@
|
|||||||
<div v-else-if="error" class="p-4 text-sm text-red-500 whitespace-pre-wrap">{{ error }}</div>
|
<div v-else-if="error" class="p-4 text-sm text-red-500 whitespace-pre-wrap">{{ error }}</div>
|
||||||
<div v-else-if="contacts.length === 0" class="p-4 text-sm text-gray-500">暂无联系人</div>
|
<div v-else-if="contacts.length === 0" class="p-4 text-sm text-gray-500">暂无联系人</div>
|
||||||
<div v-else>
|
<div v-else>
|
||||||
|
<div v-for="group in groupedContacts" :key="group.key">
|
||||||
|
<div class="px-3 py-1 text-xs font-semibold text-gray-500 bg-gray-50 border-b border-gray-100">
|
||||||
|
{{ group.key }}
|
||||||
|
</div>
|
||||||
<div
|
<div
|
||||||
v-for="contact in contacts"
|
v-for="contact in group.items"
|
||||||
:key="contact.username"
|
:key="contact.username"
|
||||||
class="px-3 py-2 border-b border-gray-100 flex items-center gap-3"
|
class="px-3 py-2 border-b border-gray-100 flex items-center gap-3"
|
||||||
>
|
>
|
||||||
@@ -74,6 +78,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="bg-white border border-gray-200 rounded-lg p-4 flex flex-col gap-3">
|
<div class="bg-white border border-gray-200 rounded-lg p-4 flex flex-col gap-3">
|
||||||
<div>
|
<div>
|
||||||
@@ -184,6 +189,54 @@ const typeBadgeClass = (type) => {
|
|||||||
return 'bg-gray-100 text-gray-600'
|
return 'bg-gray-100 text-gray-600'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const normalizeContactGroupKey = (value) => {
|
||||||
|
const key = String(value || '').trim().toUpperCase()
|
||||||
|
if (key.length === 1 && key >= 'A' && key <= 'Z') return key
|
||||||
|
return '#'
|
||||||
|
}
|
||||||
|
|
||||||
|
const buildContactSortKey = (contact) => {
|
||||||
|
const pinyinKey = String(contact?.pinyinKey || '').trim().toLowerCase()
|
||||||
|
if (pinyinKey) return pinyinKey
|
||||||
|
const nameKey = String(contact?.displayName || '').trim().toLowerCase()
|
||||||
|
if (nameKey) return nameKey
|
||||||
|
return String(contact?.username || '').trim().toLowerCase()
|
||||||
|
}
|
||||||
|
|
||||||
|
const groupedContacts = computed(() => {
|
||||||
|
const list = Array.isArray(contacts.value) ? contacts.value : []
|
||||||
|
const rows = list.map((contact) => {
|
||||||
|
return {
|
||||||
|
contact,
|
||||||
|
groupKey: normalizeContactGroupKey(contact?.pinyinInitial),
|
||||||
|
sortKey: buildContactSortKey(contact),
|
||||||
|
usernameKey: String(contact?.username || '').trim().toLowerCase(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
rows.sort((a, b) => {
|
||||||
|
if (a.groupKey !== b.groupKey) {
|
||||||
|
if (a.groupKey === '#') return 1
|
||||||
|
if (b.groupKey === '#') return -1
|
||||||
|
return a.groupKey.localeCompare(b.groupKey)
|
||||||
|
}
|
||||||
|
const cmpKey = a.sortKey.localeCompare(b.sortKey)
|
||||||
|
if (cmpKey !== 0) return cmpKey
|
||||||
|
return a.usernameKey.localeCompare(b.usernameKey)
|
||||||
|
})
|
||||||
|
|
||||||
|
const groups = []
|
||||||
|
for (const row of rows) {
|
||||||
|
const last = groups[groups.length - 1]
|
||||||
|
if (!last || last.key !== row.groupKey) {
|
||||||
|
groups.push({ key: row.groupKey, items: [row.contact] })
|
||||||
|
} else {
|
||||||
|
last.items.push(row.contact)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return groups
|
||||||
|
})
|
||||||
|
|
||||||
const isDesktopExportRuntime = () => {
|
const isDesktopExportRuntime = () => {
|
||||||
return !!(process.client && window?.wechatDesktop?.chooseDirectory)
|
return !!(process.client && window?.wechatDesktop?.chooseDirectory)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,10 +3,12 @@ import json
|
|||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal, Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Request
|
from fastapi import APIRouter, HTTPException, Request
|
||||||
|
from pypinyin import Style, lazy_pinyin
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..chat_helpers import (
|
from ..chat_helpers import (
|
||||||
@@ -96,6 +98,76 @@ def _to_optional_int(v: Any) -> Optional[int]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
_PINYIN_CLEAN_RE = re.compile(r"[^a-z0-9]+")
|
||||||
|
_PINYIN_ALPHA_RE = re.compile(r"[A-Za-z]")
|
||||||
|
|
||||||
|
# 多音字姓氏:pypinyin 对单字默认读音不一定是姓氏读音(例如:曾= ceng / zeng)。
|
||||||
|
# 这里在“姓名首字”场景优先采用常见姓氏读音,用于联系人列表的分组/排序。
|
||||||
|
_SURNAME_PINYIN_OVERRIDES: dict[str, str] = {
|
||||||
|
"曾": "zeng",
|
||||||
|
"区": "ou",
|
||||||
|
"仇": "qiu",
|
||||||
|
"解": "xie",
|
||||||
|
"单": "shan",
|
||||||
|
"查": "zha",
|
||||||
|
"乐": "yue",
|
||||||
|
"朴": "piao",
|
||||||
|
"盖": "ge",
|
||||||
|
"缪": "miao",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=4096)
|
||||||
|
def _build_contact_pinyin_key(name: str) -> str:
|
||||||
|
text = _normalize_text(name)
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Keep non-CJK segments so English names can be sorted/grouped as expected.
|
||||||
|
first = text[0]
|
||||||
|
override = _SURNAME_PINYIN_OVERRIDES.get(first)
|
||||||
|
if override:
|
||||||
|
rest = text[1:]
|
||||||
|
parts = [override]
|
||||||
|
if rest:
|
||||||
|
parts.extend(lazy_pinyin(rest, style=Style.NORMAL, errors="default"))
|
||||||
|
else:
|
||||||
|
parts = lazy_pinyin(text, style=Style.NORMAL, errors="default")
|
||||||
|
out: list[str] = []
|
||||||
|
for part in parts:
|
||||||
|
cleaned = _PINYIN_CLEAN_RE.sub("", _normalize_text(part).lower())
|
||||||
|
if cleaned:
|
||||||
|
out.append(cleaned)
|
||||||
|
return "".join(out)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=4096)
|
||||||
|
def _build_contact_pinyin_initial(name: str) -> str:
|
||||||
|
text = _normalize_text(name).lstrip()
|
||||||
|
if not text:
|
||||||
|
return "#"
|
||||||
|
|
||||||
|
first = text[0]
|
||||||
|
if "A" <= first <= "Z":
|
||||||
|
return first
|
||||||
|
if "a" <= first <= "z":
|
||||||
|
return first.upper()
|
||||||
|
|
||||||
|
override = _SURNAME_PINYIN_OVERRIDES.get(first)
|
||||||
|
if override:
|
||||||
|
return override[0].upper()
|
||||||
|
|
||||||
|
# For CJK, try to convert the first character to pinyin initial.
|
||||||
|
parts = lazy_pinyin(first, style=Style.NORMAL, errors="ignore")
|
||||||
|
if parts:
|
||||||
|
m = _PINYIN_ALPHA_RE.search(parts[0])
|
||||||
|
if m:
|
||||||
|
return m.group(0).upper()
|
||||||
|
|
||||||
|
# Emoji / digits / symbols, etc.
|
||||||
|
return "#"
|
||||||
|
|
||||||
|
|
||||||
def _decode_varint(raw: bytes, offset: int) -> tuple[Optional[int], int]:
|
def _decode_varint(raw: bytes, offset: int) -> tuple[Optional[int], int]:
|
||||||
value = 0
|
value = 0
|
||||||
shift = 0
|
shift = 0
|
||||||
@@ -125,6 +197,7 @@ def _decode_proto_text(raw: bytes) -> str:
|
|||||||
|
|
||||||
def _parse_contact_extra_buffer(extra_buffer: Any) -> dict[str, Any]:
|
def _parse_contact_extra_buffer(extra_buffer: Any) -> dict[str, Any]:
|
||||||
out = {
|
out = {
|
||||||
|
"gender": 0,
|
||||||
"signature": "",
|
"signature": "",
|
||||||
"country": "",
|
"country": "",
|
||||||
"province": "",
|
"province": "",
|
||||||
@@ -160,6 +233,9 @@ def _parse_contact_extra_buffer(extra_buffer: Any) -> dict[str, Any]:
|
|||||||
if val is None:
|
if val is None:
|
||||||
break
|
break
|
||||||
idx = idx_next
|
idx = idx_next
|
||||||
|
if field_no == 2:
|
||||||
|
# 性别: 1=男, 2=女, 0=未知
|
||||||
|
out["gender"] = int(val)
|
||||||
if field_no == 8:
|
if field_no == 8:
|
||||||
out["source_scene"] = int(val)
|
out["source_scene"] = int(val)
|
||||||
continue
|
continue
|
||||||
@@ -327,6 +403,8 @@ def _load_contact_rows_map(contact_db_path: Path) -> dict[str, dict[str, Any]]:
|
|||||||
"verify_flag": _to_int(row["verify_flag"] if "verify_flag" in row.keys() else 0),
|
"verify_flag": _to_int(row["verify_flag"] if "verify_flag" in row.keys() else 0),
|
||||||
"big_head_url": _normalize_text(row["big_head_url"] if "big_head_url" in row.keys() else ""),
|
"big_head_url": _normalize_text(row["big_head_url"] if "big_head_url" in row.keys() else ""),
|
||||||
"small_head_url": _normalize_text(row["small_head_url"] if "small_head_url" in row.keys() else ""),
|
"small_head_url": _normalize_text(row["small_head_url"] if "small_head_url" in row.keys() else ""),
|
||||||
|
"gender": _to_int(extra_info.get("gender")),
|
||||||
|
"signature": _normalize_text(extra_info.get("signature")),
|
||||||
"country": _normalize_text(extra_info.get("country")),
|
"country": _normalize_text(extra_info.get("country")),
|
||||||
"province": _normalize_text(extra_info.get("province")),
|
"province": _normalize_text(extra_info.get("province")),
|
||||||
"city": _normalize_text(extra_info.get("city")),
|
"city": _normalize_text(extra_info.get("city")),
|
||||||
@@ -481,6 +559,8 @@ def _collect_contacts_for_account(
|
|||||||
province = _normalize_text(row.get("province"))
|
province = _normalize_text(row.get("province"))
|
||||||
city = _normalize_text(row.get("city"))
|
city = _normalize_text(row.get("city"))
|
||||||
source_scene = _to_optional_int(row.get("source_scene"))
|
source_scene = _to_optional_int(row.get("source_scene"))
|
||||||
|
gender = _to_int(row.get("gender"))
|
||||||
|
signature = _normalize_text(row.get("signature"))
|
||||||
|
|
||||||
item = {
|
item = {
|
||||||
"username": username,
|
"username": username,
|
||||||
@@ -488,6 +568,8 @@ def _collect_contacts_for_account(
|
|||||||
"remark": _normalize_text(row.get("remark")),
|
"remark": _normalize_text(row.get("remark")),
|
||||||
"nickname": _normalize_text(row.get("nick_name")),
|
"nickname": _normalize_text(row.get("nick_name")),
|
||||||
"alias": _normalize_text(row.get("alias")),
|
"alias": _normalize_text(row.get("alias")),
|
||||||
|
"gender": gender,
|
||||||
|
"signature": signature,
|
||||||
"type": contact_type,
|
"type": contact_type,
|
||||||
"country": country,
|
"country": country,
|
||||||
"province": province,
|
"province": province,
|
||||||
@@ -520,6 +602,8 @@ def _collect_contacts_for_account(
|
|||||||
"remark": "",
|
"remark": "",
|
||||||
"nickname": "",
|
"nickname": "",
|
||||||
"alias": "",
|
"alias": "",
|
||||||
|
"gender": 0,
|
||||||
|
"signature": "",
|
||||||
"type": "group",
|
"type": "group",
|
||||||
"country": "",
|
"country": "",
|
||||||
"province": "",
|
"province": "",
|
||||||
@@ -545,6 +629,9 @@ def _collect_contacts_for_account(
|
|||||||
)
|
)
|
||||||
for item in contacts:
|
for item in contacts:
|
||||||
item.pop("_sortTs", None)
|
item.pop("_sortTs", None)
|
||||||
|
name_for_pinyin = _normalize_text(item.get("displayName")) or _normalize_text(item.get("username"))
|
||||||
|
item["pinyinKey"] = _build_contact_pinyin_key(name_for_pinyin)
|
||||||
|
item["pinyinInitial"] = _build_contact_pinyin_initial(name_for_pinyin)
|
||||||
return contacts
|
return contacts
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -39,9 +39,20 @@ class TestContactsExport(unittest.TestCase):
|
|||||||
return cls._encode_varint(tag) + cls._encode_varint(int(value))
|
return cls._encode_varint(tag) + cls._encode_varint(int(value))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_extra_buffer(cls, *, country: str, province: str, city: str, source_scene: int) -> bytes:
|
def _build_extra_buffer(
|
||||||
|
cls,
|
||||||
|
*,
|
||||||
|
country: str,
|
||||||
|
province: str,
|
||||||
|
city: str,
|
||||||
|
source_scene: int,
|
||||||
|
gender: int = 0,
|
||||||
|
signature: str = "",
|
||||||
|
) -> bytes:
|
||||||
return b"".join(
|
return b"".join(
|
||||||
[
|
[
|
||||||
|
cls._encode_field_varint(2, gender),
|
||||||
|
cls._encode_field_len(4, signature.encode("utf-8")),
|
||||||
cls._encode_field_len(5, country.encode("utf-8")),
|
cls._encode_field_len(5, country.encode("utf-8")),
|
||||||
cls._encode_field_len(6, province.encode("utf-8")),
|
cls._encode_field_len(6, province.encode("utf-8")),
|
||||||
cls._encode_field_len(7, city.encode("utf-8")),
|
cls._encode_field_len(7, city.encode("utf-8")),
|
||||||
@@ -88,6 +99,8 @@ class TestContactsExport(unittest.TestCase):
|
|||||||
province="Sichuan",
|
province="Sichuan",
|
||||||
city="Chengdu",
|
city="Chengdu",
|
||||||
source_scene=14,
|
source_scene=14,
|
||||||
|
gender=1,
|
||||||
|
signature="自助者天助!!!",
|
||||||
)
|
)
|
||||||
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
@@ -320,6 +333,8 @@ class TestContactsExport(unittest.TestCase):
|
|||||||
self.assertEqual(friend_contact.get("province"), "Sichuan")
|
self.assertEqual(friend_contact.get("province"), "Sichuan")
|
||||||
self.assertEqual(friend_contact.get("city"), "Chengdu")
|
self.assertEqual(friend_contact.get("city"), "Chengdu")
|
||||||
self.assertEqual(friend_contact.get("region"), "中国大陆·Sichuan·Chengdu")
|
self.assertEqual(friend_contact.get("region"), "中国大陆·Sichuan·Chengdu")
|
||||||
|
self.assertEqual(friend_contact.get("gender"), 1)
|
||||||
|
self.assertEqual(friend_contact.get("signature"), "自助者天助!!!")
|
||||||
self.assertEqual(friend_contact.get("sourceScene"), 14)
|
self.assertEqual(friend_contact.get("sourceScene"), 14)
|
||||||
self.assertEqual(friend_contact.get("source"), "通过群聊添加")
|
self.assertEqual(friend_contact.get("source"), "通过群聊添加")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user