mirror of
https://github.com/LifeArchiveProject/WeChatDataAnalysis.git
synced 2026-02-19 14:20:51 +08:00
feat: parse some other media like article
This commit is contained in:
@@ -47,31 +47,81 @@
|
||||
</div>
|
||||
|
||||
<div
|
||||
v-if="post.contentDesc"
|
||||
class="mt-1 text-sm text-gray-900 leading-6 whitespace-pre-wrap break-words"
|
||||
:class="{ 'privacy-blur': privacyMode }"
|
||||
v-if="post.contentDesc"
|
||||
class="mt-1 text-sm text-gray-900 leading-6 whitespace-pre-wrap break-words"
|
||||
:class="{ 'privacy-blur': privacyMode }"
|
||||
>
|
||||
{{ post.contentDesc }}
|
||||
</div>
|
||||
|
||||
<div v-if="post.media && post.media.length > 0" class="mt-2" :class="{ 'privacy-blur': privacyMode }">
|
||||
<div v-if="post.type === 3" class="mt-2 max-w-[360px]" :class="{ 'privacy-blur': privacyMode }">
|
||||
<a :href="post.contentUrl" target="_blank" class="block bg-gray-100 p-2 rounded-sm border border-gray-200 no-underline hover:bg-gray-200 transition-colors">
|
||||
<div class="flex items-center gap-3">
|
||||
<img
|
||||
v-if="post.contentUrl && !hasArticleThumbError(post.id)"
|
||||
:src="getArticleThumbProxyUrl(post.contentUrl)"
|
||||
class="w-12 h-12 object-cover flex-shrink-0 bg-white"
|
||||
alt=""
|
||||
@error="onArticleThumbError(post.id)"
|
||||
/>
|
||||
<div v-else class="w-12 h-12 flex items-center justify-center bg-gray-200 text-gray-400 flex-shrink-0 text-xs">
|
||||
文章
|
||||
</div>
|
||||
|
||||
<div class="flex-1 flex flex-col justify-between overflow-hidden h-12">
|
||||
<div class="text-[13px] text-gray-900 leading-tight line-clamp-2">{{ post.title }}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="text-[11px] text-[#576b95] mt-1 pt-1 border-t border-gray-200/50">
|
||||
公众号文章分享
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div v-else-if="post.type === 28 && post.finderFeed && Object.keys(post.finderFeed).length > 0" class="mt-2 max-w-[360px]" :class="{ 'privacy-blur': privacyMode }">
|
||||
<div class="block bg-gray-100 p-2 rounded-sm border border-gray-200 no-underline hover:bg-gray-200 transition-colors">
|
||||
<!-- 浏览器没有看微信视频号的环境,暂时不进行跳转!!-->
|
||||
<div class="flex items-start gap-3">
|
||||
<div class="relative w-14 h-16 flex-shrink-0 bg-black overflow-hidden rounded-sm">
|
||||
<img
|
||||
v-if="post.finderFeed.thumbUrl"
|
||||
:src="getProxyExternalUrl(post.finderFeed.thumbUrl)"
|
||||
class="w-full h-full object-cover opacity-80"
|
||||
alt="finder cover"
|
||||
/>
|
||||
<div class="absolute inset-0 flex items-center justify-center pointer-events-none">
|
||||
<svg class="w-5 h-5 text-white/90" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex-1 flex flex-col overflow-hidden">
|
||||
<div class="text-xs text-gray-500 truncate">{{ post.finderFeed.nickname }}</div>
|
||||
<div class="text-[13px] text-gray-900 leading-tight line-clamp-2 mt-[2px]">{{ post.finderFeed.desc || post.title }}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="text-[11px] text-[#576b95] mt-1 pt-1 border-t border-gray-200/50">
|
||||
视频号 · 动态
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div v-else-if="post.media && post.media.length > 0" class="mt-2" :class="{ 'privacy-blur': privacyMode }">
|
||||
<div v-if="post.media.length === 1" class="max-w-[360px]">
|
||||
<div
|
||||
v-if="!hasMediaError(post.id, 0) && getMediaThumbSrc(post, post.media[0], 0)"
|
||||
class="inline-block cursor-pointer relative"
|
||||
@click.stop="onMediaClick(post, post.media[0], 0)"
|
||||
v-if="!hasMediaError(post.id, 0) && getMediaThumbSrc(post, post.media[0], 0)"
|
||||
class="inline-block cursor-pointer relative"
|
||||
@click.stop="onMediaClick(post, post.media[0], 0)"
|
||||
>
|
||||
<img
|
||||
:src="getMediaThumbSrc(post, post.media[0], 0)"
|
||||
class="rounded-sm max-h-[360px] object-cover"
|
||||
alt=""
|
||||
loading="lazy"
|
||||
referrerpolicy="no-referrer"
|
||||
@error="onMediaError(post.id, 0)"
|
||||
:src="getMediaThumbSrc(post, post.media[0], 0)"
|
||||
class="rounded-sm max-h-[360px] object-cover"
|
||||
alt=""
|
||||
loading="lazy"
|
||||
referrerpolicy="no-referrer"
|
||||
@error="onMediaError(post.id, 0)"
|
||||
/>
|
||||
<div
|
||||
v-if="Number(post.media[0]?.type || 0) === 6"
|
||||
class="absolute inset-0 flex items-center justify-center pointer-events-none"
|
||||
v-if="Number(post.media[0]?.type || 0) === 6"
|
||||
class="absolute inset-0 flex items-center justify-center pointer-events-none"
|
||||
>
|
||||
<div class="w-12 h-12 rounded-full bg-black/45 flex items-center justify-center">
|
||||
<svg class="w-6 h-6 text-white" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
|
||||
@@ -79,11 +129,11 @@
|
||||
</div>
|
||||
</div>
|
||||
<div
|
||||
v-else
|
||||
class="w-[240px] h-[180px] rounded-sm bg-gray-100 border border-gray-200 flex items-center justify-center text-xs text-gray-400"
|
||||
title="图片加载失败"
|
||||
@click.stop="onMediaClick(post, post.media[0], 0)"
|
||||
style="cursor: pointer;"
|
||||
v-else
|
||||
class="w-[240px] h-[180px] rounded-sm bg-gray-100 border border-gray-200 flex items-center justify-center text-xs text-gray-400"
|
||||
title="图片加载失败"
|
||||
@click.stop="onMediaClick(post, post.media[0], 0)"
|
||||
style="cursor: pointer;"
|
||||
>
|
||||
图片加载失败
|
||||
</div>
|
||||
@@ -91,23 +141,22 @@
|
||||
|
||||
<div v-else class="grid grid-cols-3 gap-1 max-w-[360px]">
|
||||
<div
|
||||
v-for="(m, idx) in post.media.slice(0, 9)"
|
||||
:key="idx"
|
||||
class="w-[116px] h-[116px] rounded-[2px] overflow-hidden bg-gray-100 border border-gray-200 flex items-center justify-center cursor-pointer relative"
|
||||
@click.stop="onMediaClick(post, m, idx)"
|
||||
v-for="(m, idx) in post.media.slice(0, 9)"
|
||||
:key="idx"
|
||||
class="w-[116px] h-[116px] rounded-[2px] overflow-hidden bg-gray-100 border border-gray-200 flex items-center justify-center cursor-pointer relative"
|
||||
@click.stop="onMediaClick(post, m, idx)"
|
||||
>
|
||||
<img
|
||||
v-if="!hasMediaError(post.id, idx) && getMediaThumbSrc(post, m, idx)"
|
||||
:src="getMediaThumbSrc(post, m, idx)"
|
||||
class="w-full h-full object-cover"
|
||||
alt=""
|
||||
loading="lazy"
|
||||
referrerpolicy="no-referrer"
|
||||
@error="onMediaError(post.id, idx)"
|
||||
v-if="!hasMediaError(post.id, idx) && getMediaThumbSrc(post, m, idx)"
|
||||
:src="getMediaThumbSrc(post, m, idx)"
|
||||
class="w-full h-full object-cover"
|
||||
alt=""
|
||||
loading="lazy"
|
||||
referrerpolicy="no-referrer"
|
||||
@error="onMediaError(post.id, idx)"
|
||||
/>
|
||||
<span v-else class="text-[10px] text-gray-400">图片失败</span>
|
||||
|
||||
<!-- 视频缩略图的播放提示 -->
|
||||
<div v-if="Number(m?.type || 0) === 6" class="absolute inset-0 flex items-center justify-center pointer-events-none">
|
||||
<div class="w-10 h-10 rounded-full bg-black/45 flex items-center justify-center">
|
||||
<svg class="w-5 h-5 text-white" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
|
||||
@@ -422,6 +471,21 @@ const onMediaError = (postId, idx) => {
|
||||
mediaErrors.value[mediaErrorKey(postId, idx)] = true
|
||||
}
|
||||
|
||||
const articleThumbErrors = ref({})
|
||||
|
||||
const hasArticleThumbError = (postId) => !!articleThumbErrors.value[postId]
|
||||
|
||||
const onArticleThumbError = (postId) => {
|
||||
articleThumbErrors.value[postId] = true
|
||||
}
|
||||
|
||||
// (原有的函数保持不变)
|
||||
const getArticleThumbProxyUrl = (contentUrl) => {
|
||||
const u = String(contentUrl || '').trim()
|
||||
if (!u) return ''
|
||||
return `${mediaBase}/api/sns/article_thumb?url=${encodeURIComponent(u)}`
|
||||
}
|
||||
|
||||
// Right-click context menu (copy text / JSON) to help debug SNS parsing issues.
|
||||
const contextMenu = ref({ visible: false, x: 0, y: 0, post: null })
|
||||
|
||||
@@ -954,4 +1018,13 @@ onUnmounted(() => {
|
||||
document.removeEventListener('click', onGlobalClick)
|
||||
document.removeEventListener('keydown', onGlobalKeyDown)
|
||||
})
|
||||
|
||||
const getProxyExternalUrl = (url) => {
|
||||
// 目前难以计算enc,代理获取封面图(thumbnail)
|
||||
const u = String(url || '').trim()
|
||||
if (!u) return ''
|
||||
return `${mediaBase}/api/chat/media/proxy_image?url=${encodeURIComponent(u)}`
|
||||
}
|
||||
|
||||
|
||||
</script>
|
||||
|
||||
@@ -4,6 +4,8 @@ from pathlib import Path
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import httpx
|
||||
import html # 修复&转义的问题!!!
|
||||
import sqlite3
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
@@ -93,6 +95,10 @@ def _parse_timeline_xml(xml_text: str, fallback_username: str) -> dict[str, Any]
|
||||
"media": [],
|
||||
"likes": [],
|
||||
"comments": [],
|
||||
"type": 1, # 默认类型
|
||||
"title": "",
|
||||
"contentUrl": "",
|
||||
"finderFeed": {}
|
||||
}
|
||||
|
||||
xml_str = str(xml_text or "").strip()
|
||||
@@ -113,54 +119,72 @@ def _parse_timeline_xml(xml_text: str, fallback_username: str) -> dict[str, Any]
|
||||
if isinstance(v, str) and v.strip():
|
||||
return v.strip()
|
||||
return ""
|
||||
# &转义!!
|
||||
def _clean_url(u: str) -> str:
|
||||
if not u:
|
||||
return ""
|
||||
|
||||
out["username"] = (
|
||||
_find_text(".//TimelineObject/username", ".//TimelineObject/user_name", ".//TimelineObject/userName", ".//username")
|
||||
or fallback_username
|
||||
)
|
||||
cleaned = html.unescape(u)
|
||||
cleaned = cleaned.replace("&", "&")
|
||||
return cleaned.strip()
|
||||
|
||||
out["username"] = _find_text(".//TimelineObject/username", ".//TimelineObject/user_name",
|
||||
".//username") or fallback_username
|
||||
out["createTime"] = _safe_int(_find_text(".//TimelineObject/createTime", ".//createTime"))
|
||||
out["contentDesc"] = _find_text(".//TimelineObject/contentDesc", ".//contentDesc")
|
||||
out["location"] = _build_location_text(root.find(".//location"))
|
||||
|
||||
# --- 提取内容类型 ---
|
||||
post_type = _safe_int(_find_text(".//ContentObject/type", ".//type"))
|
||||
out["type"] = post_type
|
||||
|
||||
# --- 如果是公众号文章 (Type 3) ---
|
||||
if post_type == 3:
|
||||
out["title"] = _find_text(".//ContentObject/title")
|
||||
out["contentUrl"] = _clean_url(_find_text(".//ContentObject/contentUrl"))
|
||||
|
||||
# --- 如果是视频号 (Type 28) ---
|
||||
if post_type == 28:
|
||||
out["title"] = _find_text(".//ContentObject/title")
|
||||
out["contentUrl"] = _clean_url(_find_text(".//ContentObject/contentUrl"))
|
||||
out["finderFeed"] = {
|
||||
"nickname": _find_text(".//finderFeed/nickname"),
|
||||
"desc": _find_text(".//finderFeed/desc"),
|
||||
"thumbUrl": _clean_url(
|
||||
_find_text(".//finderFeed/mediaList/media/thumbUrl", ".//finderFeed/mediaList/media/coverUrl")),
|
||||
"url": _clean_url(_find_text(".//finderFeed/mediaList/media/url"))
|
||||
}
|
||||
|
||||
media: list[dict[str, Any]] = []
|
||||
try:
|
||||
for m in root.findall(".//mediaList//media"):
|
||||
mt = _safe_int(m.findtext("type"))
|
||||
url_el = m.find("url") if m.find("url") is not None else m.find("urlV")
|
||||
thumb_el = m.find("thumb") if m.find("thumb") is not None else m.find("thumbV")
|
||||
|
||||
# WeChat stores important download/auth hints in attributes (key/enc_idx/token/md5...).
|
||||
# NOTE: xml.etree.ElementTree.Element is falsy when it has no children.
|
||||
# So we must check `is None` instead of using `or`, otherwise `<url>` would be treated as missing.
|
||||
url_el = m.find("url")
|
||||
if url_el is None:
|
||||
url_el = m.find("urlV")
|
||||
thumb_el = m.find("thumb")
|
||||
if thumb_el is None:
|
||||
thumb_el = m.find("thumbV")
|
||||
|
||||
url = str((url_el.text if url_el is not None else "") or "").strip()
|
||||
thumb = str((thumb_el.text if thumb_el is not None else "") or "").strip()
|
||||
url = _clean_url(url_el.text if url_el is not None else "")
|
||||
thumb = _clean_url(thumb_el.text if thumb_el is not None else "")
|
||||
|
||||
url_attrs = dict(url_el.attrib) if url_el is not None and url_el.attrib else {}
|
||||
thumb_attrs = dict(thumb_el.attrib) if thumb_el is not None and thumb_el.attrib else {}
|
||||
|
||||
media_id = str(m.findtext("id") or "").strip()
|
||||
size_el = m.find("size")
|
||||
size = dict(size_el.attrib) if size_el is not None and size_el.attrib else {}
|
||||
|
||||
if not url and not thumb:
|
||||
continue
|
||||
media.append(
|
||||
{
|
||||
"type": mt,
|
||||
"id": media_id,
|
||||
"url": url,
|
||||
"thumb": thumb,
|
||||
"urlAttrs": url_attrs,
|
||||
"thumbAttrs": thumb_attrs,
|
||||
"size": size,
|
||||
}
|
||||
)
|
||||
|
||||
media.append({
|
||||
"type": mt,
|
||||
"id": media_id,
|
||||
"url": url,
|
||||
"thumb": thumb,
|
||||
"urlAttrs": url_attrs,
|
||||
"thumbAttrs": thumb_attrs,
|
||||
"size": size,
|
||||
})
|
||||
except Exception:
|
||||
media = []
|
||||
pass
|
||||
out["media"] = media
|
||||
|
||||
likes: list[str] = []
|
||||
@@ -789,6 +813,11 @@ def list_sns_timeline(
|
||||
|
||||
# Enrich with parsed XML when available.
|
||||
location = str(r.get("location") or "")
|
||||
|
||||
post_type = 1
|
||||
title = ""
|
||||
content_url = ""
|
||||
finder_feed = {}
|
||||
try:
|
||||
tid_u = int(r.get("id") or 0)
|
||||
tid_s = (tid_u & 0xFFFFFFFFFFFFFFFF)
|
||||
@@ -799,6 +828,12 @@ def list_sns_timeline(
|
||||
parsed = _parse_timeline_xml(xml, uname)
|
||||
if parsed.get("location"):
|
||||
location = str(parsed.get("location") or "")
|
||||
|
||||
post_type = parsed.get("type", 1)
|
||||
title = parsed.get("title", "")
|
||||
content_url = parsed.get("contentUrl", "")
|
||||
finder_feed = parsed.get("finderFeed", {})
|
||||
|
||||
pmedia = parsed.get("media") or []
|
||||
if isinstance(pmedia, list) and isinstance(media, list) and pmedia:
|
||||
# Merge by index (best-effort).
|
||||
@@ -835,6 +870,10 @@ def list_sns_timeline(
|
||||
"media": media,
|
||||
"likes": likes,
|
||||
"comments": comments,
|
||||
"type": post_type,
|
||||
"title": title,
|
||||
"contentUrl": content_url,
|
||||
"finderFeed": finder_feed,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -911,6 +950,10 @@ def list_sns_timeline(
|
||||
"media": parsed.get("media") or [],
|
||||
"likes": parsed.get("likes") or [],
|
||||
"comments": parsed.get("comments") or [],
|
||||
"type": parsed.get("type", 1),
|
||||
"title": parsed.get("title", ""),
|
||||
"contentUrl": parsed.get("contentUrl", ""),
|
||||
"finderFeed": parsed.get("finderFeed", {}),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -987,6 +1030,7 @@ async def get_sns_media(
|
||||
)
|
||||
|
||||
if exact_match_path:
|
||||
print(f"=====exact_match_path======={exact_match_path}=============")
|
||||
try:
|
||||
payload, mtype = _read_and_maybe_decrypt_media(Path(exact_match_path), account_dir)
|
||||
if payload and str(mtype or "").startswith("image/"):
|
||||
@@ -997,6 +1041,8 @@ async def get_sns_media(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print("no exact match path")
|
||||
|
||||
# 0) User-picked cache key override (stable across candidate ordering).
|
||||
pick_key = _normalize_hex32(pick)
|
||||
if pick_key:
|
||||
@@ -1105,3 +1151,37 @@ async def get_sns_media(
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=502, detail=f"Fetch sns media failed: {e}")
|
||||
|
||||
|
||||
@router.get("/api/sns/article_thumb", summary="提取公众号文章封面图")
|
||||
async def proxy_article_thumb(url: str):
|
||||
u = str(url or "").strip()
|
||||
if not u.startswith("http"):
|
||||
raise HTTPException(status_code=400, detail="Invalid URL")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
||||
resp = await client.get(u, headers=headers)
|
||||
resp.raise_for_status()
|
||||
html_text = resp.text
|
||||
|
||||
match = re.search(r'["\'](https?://[^"\']*?mmbiz_[a-zA-Z]+[^"\']*?)["\']', html_text)
|
||||
|
||||
if not match:
|
||||
raise HTTPException(status_code=404, detail="未在 HTML 中找到图片 URL")
|
||||
|
||||
img_url = match.group(1)
|
||||
img_url = html.unescape(img_url).replace("&", "&")
|
||||
|
||||
img_resp = await client.get(img_url, headers=headers)
|
||||
img_resp.raise_for_status()
|
||||
|
||||
return Response(
|
||||
content=img_resp.content,
|
||||
media_type=img_resp.headers.get("Content-Type", "image/jpeg")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[sns] 提取公众号封面失败 url={u[:50]}... : {e}")
|
||||
raise HTTPException(status_code=404, detail="无法获取文章封面")
|
||||
Reference in New Issue
Block a user