fix(chat): 链接卡片补全公众号来源并解决缩略图防盗链

- appmsg 解析补全 from/fromUsername,并规范化 url/thumbUrl
- contact.db 兜底反查 fromUsername(仅有 sourcedisplayname 时)
- 新增 /api/chat/media/proxy_image,仅允许 qpic/qlogo,带 mp.weixin.qq.com Referer(10MB 限制)
- 前端 LinkCard 增加来源头像/host 兜底,qpic/qlogo 预览图走代理;头像加载失败回退
- 导出消息补充 from 字段
This commit is contained in:
2977094657
2026-01-24 10:51:35 +08:00
parent d3d1c8dc7d
commit c523036a10
5 changed files with 510 additions and 34 deletions

View File

@@ -894,6 +894,7 @@ def _parse_message_for_export(
content_text = raw_text
title = ""
url = ""
from_name = ""
record_item = ""
image_md5 = ""
image_file_id = ""
@@ -934,6 +935,7 @@ def _parse_message_for_export(
content_text = str(parsed.get("content") or "")
title = str(parsed.get("title") or "")
url = str(parsed.get("url") or "")
from_name = str(parsed.get("from") or "")
record_item = str(parsed.get("recordItem") or "")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
@@ -1162,6 +1164,7 @@ def _parse_message_for_export(
"content": content_text,
"title": title,
"url": url,
"from": from_name,
"recordItem": record_item,
"thumbUrl": thumb_url,
"imageMd5": image_md5,

View File

@@ -773,7 +773,21 @@ def _parse_app_message(text: str) -> dict[str, Any]:
app_type = 0
title = _extract_xml_tag_text(text, "title")
des = _extract_xml_tag_text(text, "des")
url = _extract_xml_tag_text(text, "url")
url = _normalize_xml_url(_extract_xml_tag_text(text, "url"))
# Some appmsg payloads (notably mp.weixin.qq.com link shares) include a "source" block:
# <sourceusername>gh_xxx</sourceusername>
# <sourcedisplayname>公众号名</sourcedisplayname>
# We'll surface that as `from` so the frontend can render the publisher line like WeChat.
source_display_name = (
_extract_xml_tag_text(text, "sourcedisplayname")
or _extract_xml_tag_text(text, "sourceDisplayName")
or _extract_xml_tag_text(text, "appname")
)
source_username = (
_extract_xml_tag_text(text, "sourceusername")
or _extract_xml_tag_text(text, "sourceUsername")
)
lower = text.lower()
@@ -794,13 +808,15 @@ def _parse_app_message(text: str) -> dict[str, Any]:
}
if app_type in (5, 68) and url:
thumb_url = _extract_xml_tag_text(text, "thumburl")
thumb_url = _normalize_xml_url(_extract_xml_tag_text(text, "thumburl"))
return {
"renderType": "link",
"content": des or title or "[链接]",
"title": title or des or "",
"url": url,
"thumbUrl": thumb_url or "",
"from": str(source_display_name or "").strip(),
"fromUsername": str(source_username or "").strip(),
}
if app_type in (6, 74):
@@ -1322,6 +1338,58 @@ def _load_contact_rows(contact_db_path: Path, usernames: list[str]) -> dict[str,
conn.close()
def _load_usernames_by_display_names(contact_db_path: Path, names: list[str]) -> dict[str, str]:
"""Best-effort mapping from display name -> username using contact.db.
Some appmsg/link payloads only provide `sourcedisplayname` (surfaced as `from`) but not
`sourceusername` (`fromUsername`). We use this mapping to recover `fromUsername` so the
frontend can render the publisher avatar via `/api/chat/avatar`.
"""
uniq = list(dict.fromkeys([str(n or "").strip() for n in names if str(n or "").strip()]))
if not uniq:
return {}
placeholders = ",".join(["?"] * len(uniq))
hits: dict[str, set[str]] = {}
conn = sqlite3.connect(str(contact_db_path))
conn.row_factory = sqlite3.Row
try:
def query_table(table: str) -> None:
for col in ("remark", "nick_name", "alias"):
sql = f"""
SELECT username, {col} AS display_name
FROM {table}
WHERE {col} IN ({placeholders})
"""
try:
rows = conn.execute(sql, uniq).fetchall()
except Exception:
rows = []
for r in rows:
try:
dn = str(r["display_name"] or "").strip()
u = str(r["username"] or "").strip()
except Exception:
continue
if not dn or not u:
continue
hits.setdefault(dn, set()).add(u)
query_table("contact")
query_table("stranger")
# Only return unambiguous mappings (display name -> exactly 1 username).
out: dict[str, str] = {}
for dn, users in hits.items():
if len(users) == 1:
out[dn] = next(iter(users))
return out
finally:
conn.close()
def _make_search_tokens(q: str) -> list[str]:
tokens = [t for t in re.split(r"\s+", str(q or "").strip()) if t]
if len(tokens) > 8:

View File

@@ -39,6 +39,7 @@ from ..chat_helpers import (
_make_snippet,
_match_tokens,
_load_contact_rows,
_load_usernames_by_display_names,
_load_latest_message_previews,
_lookup_resource_md5,
_normalize_xml_url,
@@ -1519,6 +1520,8 @@ def _append_full_messages_from_rows(
content_text = raw_text
title = ""
url = ""
from_name = ""
from_username = ""
record_item = ""
image_md5 = ""
emoji_md5 = ""
@@ -1561,6 +1564,8 @@ def _append_full_messages_from_rows(
content_text = str(parsed.get("content") or "")
title = str(parsed.get("title") or "")
url = str(parsed.get("url") or "")
from_name = str(parsed.get("from") or "")
from_username = str(parsed.get("fromUsername") or "")
record_item = str(parsed.get("recordItem") or "")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
@@ -1781,6 +1786,7 @@ def _append_full_messages_from_rows(
amount = str(parsed.get("amount") or amount)
cover_url = str(parsed.get("coverUrl") or cover_url)
thumb_url = str(parsed.get("thumbUrl") or thumb_url)
from_name = str(parsed.get("from") or from_name)
file_size = str(parsed.get("size") or file_size)
pay_sub_type = str(parsed.get("paySubType") or pay_sub_type)
file_md5 = str(parsed.get("fileMd5") or file_md5)
@@ -1828,6 +1834,8 @@ def _append_full_messages_from_rows(
"content": content_text,
"title": title,
"url": url,
"from": from_name,
"fromUsername": from_username,
"recordItem": record_item,
"imageMd5": image_md5,
"imageFileId": image_file_id,
@@ -1949,13 +1957,42 @@ def _postprocess_full_messages(
is_sent = m.get("isSent", False)
m["transferStatus"] = "已收款" if is_sent else "已被接收"
# Some appmsg payloads provide only `from` (sourcedisplayname) but not `fromUsername` (sourceusername).
# Recover `fromUsername` via contact.db so the frontend can render the publisher avatar.
missing_from_names = [
str(m.get("from") or "").strip()
for m in merged
if str(m.get("renderType") or "").strip() == "link"
and str(m.get("from") or "").strip()
and not str(m.get("fromUsername") or "").strip()
]
if missing_from_names:
name_to_username = _load_usernames_by_display_names(contact_db_path, missing_from_names)
if name_to_username:
for m in merged:
if str(m.get("fromUsername") or "").strip():
continue
if str(m.get("renderType") or "").strip() != "link":
continue
fn = str(m.get("from") or "").strip()
if fn and fn in name_to_username:
m["fromUsername"] = name_to_username[fn]
from_usernames = [str(m.get("fromUsername") or "").strip() for m in merged]
uniq_senders = list(
dict.fromkeys([u for u in (sender_usernames + list(pat_usernames) + quote_usernames) if u])
dict.fromkeys([u for u in (sender_usernames + list(pat_usernames) + quote_usernames + from_usernames) if u])
)
sender_contact_rows = _load_contact_rows(contact_db_path, uniq_senders)
local_sender_avatars = _query_head_image_usernames(head_image_db_path, uniq_senders)
for m in merged:
# If appmsg doesn't provide sourcedisplayname, try mapping sourceusername to display name.
if (not str(m.get("from") or "").strip()) and str(m.get("fromUsername") or "").strip():
fu = str(m.get("fromUsername") or "").strip()
frow = sender_contact_rows.get(fu)
if frow is not None:
m["from"] = _pick_display_name(frow, fu)
su = str(m.get("senderUsername") or "")
if not su:
continue
@@ -2479,6 +2516,8 @@ def _collect_chat_messages(
content_text = raw_text
title = ""
url = ""
from_name = ""
from_username = ""
record_item = ""
image_md5 = ""
emoji_md5 = ""
@@ -2523,6 +2562,8 @@ def _collect_chat_messages(
content_text = str(parsed.get("content") or "")
title = str(parsed.get("title") or "")
url = str(parsed.get("url") or "")
from_name = str(parsed.get("from") or "")
from_username = str(parsed.get("fromUsername") or "")
record_item = str(parsed.get("recordItem") or "")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
@@ -2725,6 +2766,7 @@ def _collect_chat_messages(
content_text = str(parsed.get("content") or content_text)
title = str(parsed.get("title") or title)
url = str(parsed.get("url") or url)
from_name = str(parsed.get("from") or from_name)
record_item = str(parsed.get("recordItem") or record_item)
quote_title = str(parsed.get("quoteTitle") or quote_title)
quote_content = str(parsed.get("quoteContent") or quote_content)
@@ -2785,6 +2827,8 @@ def _collect_chat_messages(
"content": content_text,
"title": title,
"url": url,
"from": from_name,
"fromUsername": from_username,
"recordItem": record_item,
"imageMd5": image_md5,
"imageFileId": image_file_id,
@@ -3124,6 +3168,8 @@ async def list_chat_messages(
content_text = raw_text
title = ""
url = ""
from_name = ""
from_username = ""
record_item = ""
image_md5 = ""
emoji_md5 = ""
@@ -3168,6 +3214,8 @@ async def list_chat_messages(
content_text = str(parsed.get("content") or "")
title = str(parsed.get("title") or "")
url = str(parsed.get("url") or "")
from_name = str(parsed.get("from") or "")
from_username = str(parsed.get("fromUsername") or "")
record_item = str(parsed.get("recordItem") or "")
quote_title = str(parsed.get("quoteTitle") or "")
quote_content = str(parsed.get("quoteContent") or "")
@@ -3366,6 +3414,7 @@ async def list_chat_messages(
content_text = str(parsed.get("content") or content_text)
title = str(parsed.get("title") or title)
url = str(parsed.get("url") or url)
from_name = str(parsed.get("from") or from_name)
record_item = str(parsed.get("recordItem") or record_item)
quote_title = str(parsed.get("quoteTitle") or quote_title)
quote_content = str(parsed.get("quoteContent") or quote_content)
@@ -3419,6 +3468,8 @@ async def list_chat_messages(
"content": content_text,
"title": title,
"url": url,
"from": from_name,
"fromUsername": from_username,
"recordItem": record_item,
"imageMd5": image_md5,
"imageFileId": image_file_id,
@@ -3546,15 +3597,44 @@ async def list_chat_messages(
is_sent = m.get("isSent", False)
m["transferStatus"] = "已收款" if is_sent else "已被接收"
# Some appmsg payloads provide only `from` (sourcedisplayname) but not `fromUsername` (sourceusername).
# Recover `fromUsername` via contact.db so the frontend can render the publisher avatar.
missing_from_names = [
str(m.get("from") or "").strip()
for m in merged
if str(m.get("renderType") or "").strip() == "link"
and str(m.get("from") or "").strip()
and not str(m.get("fromUsername") or "").strip()
]
if missing_from_names:
name_to_username = _load_usernames_by_display_names(contact_db_path, missing_from_names)
if name_to_username:
for m in merged:
if str(m.get("fromUsername") or "").strip():
continue
if str(m.get("renderType") or "").strip() != "link":
continue
fn = str(m.get("from") or "").strip()
if fn and fn in name_to_username:
m["fromUsername"] = name_to_username[fn]
from_usernames = [str(m.get("fromUsername") or "").strip() for m in merged]
uniq_senders = list(
dict.fromkeys(
[u for u in (sender_usernames + list(pat_usernames) + quote_usernames) if u]
[u for u in (sender_usernames + list(pat_usernames) + quote_usernames + from_usernames) if u]
)
)
sender_contact_rows = _load_contact_rows(contact_db_path, uniq_senders)
local_sender_avatars = _query_head_image_usernames(head_image_db_path, uniq_senders)
for m in merged:
# If appmsg doesn't provide sourcedisplayname, try mapping sourceusername to display name.
if (not str(m.get("from") or "").strip()) and str(m.get("fromUsername") or "").strip():
fu = str(m.get("fromUsername") or "").strip()
frow = sender_contact_rows.get(fu)
if frow is not None:
m["from"] = _pick_display_name(frow, fu)
su = str(m.get("senderUsername") or "")
if not su:
continue

View File

@@ -408,6 +408,91 @@ def _detect_media_type_and_ext(data: bytes) -> tuple[bytes, str, str]:
return payload, media_type, ext
def _is_allowed_proxy_image_host(host: str) -> bool:
"""Allowlist hosts for proxying images to avoid turning this into a general SSRF gadget."""
h = str(host or "").strip().lower()
if not h:
return False
# WeChat public account/article thumbnails and avatars commonly live on these CDNs.
return h.endswith(".qpic.cn") or h.endswith(".qlogo.cn")
@router.get("/api/chat/media/proxy_image", summary="代理获取远程图片(解决微信公众号图片防盗链)")
async def proxy_image(url: str):
u = html.unescape(str(url or "")).strip()
if not u:
raise HTTPException(status_code=400, detail="Missing url.")
if not _is_safe_http_url(u):
raise HTTPException(status_code=400, detail="Invalid url (only public http/https allowed).")
try:
p = urlparse(u)
except Exception:
raise HTTPException(status_code=400, detail="Invalid url.")
host = (p.hostname or "").strip().lower()
if not _is_allowed_proxy_image_host(host):
raise HTTPException(status_code=400, detail="Unsupported url host for proxy_image.")
def _download_bytes() -> tuple[bytes, str]:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
# qpic/qlogo often require a mp.weixin.qq.com referer (anti-hotlink)
"Referer": "https://mp.weixin.qq.com/",
"Origin": "https://mp.weixin.qq.com",
}
r = requests.get(u, headers=headers, timeout=20, stream=True)
try:
r.raise_for_status()
content_type = str(r.headers.get("Content-Type") or "").strip()
max_bytes = 10 * 1024 * 1024
chunks: list[bytes] = []
total = 0
for ch in r.iter_content(chunk_size=64 * 1024):
if not ch:
continue
chunks.append(ch)
total += len(ch)
if total > max_bytes:
raise HTTPException(status_code=400, detail="Proxy image too large (>10MB).")
return b"".join(chunks), content_type
finally:
try:
r.close()
except Exception:
pass
try:
data, ct = await asyncio.to_thread(_download_bytes)
except HTTPException:
raise
except Exception as e:
logger.warning(f"proxy_image failed: url={u} err={e}")
raise HTTPException(status_code=502, detail=f"Proxy image failed: {e}")
if not data:
raise HTTPException(status_code=502, detail="Proxy returned empty body.")
payload, media_type, _ext = _detect_media_type_and_ext(data)
# Prefer upstream Content-Type when it looks like an image (sniffing may fail for some formats).
if media_type == "application/octet-stream" and ct:
try:
mt = ct.split(";")[0].strip()
if mt.startswith("image/"):
media_type = mt
except Exception:
pass
if not str(media_type or "").startswith("image/"):
raise HTTPException(status_code=502, detail="Proxy did not return an image.")
resp = Response(content=payload, media_type=media_type)
resp.headers["Cache-Control"] = "public, max-age=86400"
return resp
@router.post("/api/chat/media/emoji/download", summary="下载表情消息资源到本地 resource")
async def download_chat_emoji(req: EmojiDownloadRequest):
md5 = str(req.md5 or "").strip().lower()