fix(sns-media): 恢复朋友圈图片本地缓存优先

- /api/sns/media 先解析微信本地 cache/*/Sns/Img 与封面缓存,本地未命中后才走 WeFlow 远程下载解密

- 前端朋友圈图片请求重新携带 post_id、media_id、create_time、尺寸、md5 等本地匹配参数

- 朋友圈离线导出同步改为本地缓存优先,避免导出时优先触发网络请求
This commit is contained in:
2977094657
2026-05-21 01:11:52 +08:00
Unverified
parent 9089e7cfc4
commit e76c5d7dfb
3 changed files with 466 additions and 26 deletions
+59 -6
View File
@@ -1964,6 +1964,33 @@ const upgradeTencentHttps = (u) => {
return raw return raw
} }
const normalizeHex32 = (value) => {
const raw = String(value ?? '').trim()
if (!raw) return ''
const hex = raw.replace(/[^0-9a-fA-F]/g, '').toLowerCase()
return hex.length >= 32 ? hex.slice(0, 32) : ''
}
const mediaSizeKey = (m) => {
const t = String(m?.type ?? '')
const w = String(m?.size?.width || m?.size?.w || '').trim()
const h = String(m?.size?.height || m?.size?.h || '').trim()
const total = String(m?.size?.totalSize || m?.size?.total_size || m?.size?.total || '').trim()
return `${t}:${w}:${h}:${total}`
}
const mediaSizeGroupIndex = (post, m, idx) => {
const list = Array.isArray(post?.media) ? post.media : []
const key = mediaSizeKey(m)
const i0 = Number(idx) || 0
if (!key || i0 <= 0) return i0
let count = 0
for (let i = 0; i < i0; i++) {
if (mediaSizeKey(list[i]) === key) count++
}
return count
}
const getSnsMediaUrl = (post, m, idx, rawUrl) => { const getSnsMediaUrl = (post, m, idx, rawUrl) => {
const raw = upgradeTencentHttps(String(rawUrl || '').trim()) const raw = upgradeTencentHttps(String(rawUrl || '').trim())
if (!raw) return '' if (!raw) return ''
@@ -1980,12 +2007,37 @@ const getSnsMediaUrl = (post, m, idx, rawUrl) => {
const host = new URL(raw).hostname.toLowerCase() const host = new URL(raw).hostname.toLowerCase()
if (host.endsWith('.qpic.cn') || host.endsWith('.qlogo.cn') || host.endsWith('.tc.qq.com')) { if (host.endsWith('.qpic.cn') || host.endsWith('.qlogo.cn') || host.endsWith('.tc.qq.com')) {
const acc = String(selectedAccount.value || '').trim() const acc = String(selectedAccount.value || '').trim()
// Match WeFlow's image pipeline: use a stable URL + key/token and let the const ct = String(post?.createTime || '').trim()
// backend handle cache-first remote fetch/decrypt. Avoid attaching legacy const w = String(m?.size?.width || m?.size?.w || '').trim()
// local-match metadata to the main image path so browser caching can reuse const h = String(m?.size?.height || m?.size?.h || '').trim()
// the same request URL for list + preview. const ts = String(m?.size?.totalSize || m?.size?.total_size || m?.size?.total || '').trim()
const sizeIdx = mediaSizeGroupIndex(post, m, idx)
let md5 = normalizeHex32(m?.urlAttrs?.md5 || m?.thumbAttrs?.md5 || m?.urlAttrs?.MD5 || m?.thumbAttrs?.MD5)
if (!md5) {
const match = /[?&]md5=([0-9a-fA-F]{16,32})/.exec(raw)
if (match?.[1]) md5 = normalizeHex32(match[1])
}
const parts = new URLSearchParams() const parts = new URLSearchParams()
if (acc) parts.set('account', acc) if (acc) parts.set('account', acc)
if (ct) parts.set('create_time', ct)
if (w) parts.set('width', w)
if (h) parts.set('height', h)
if (/^\d+$/.test(ts)) parts.set('total_size', ts)
parts.set('idx', String(Number(sizeIdx) || 0))
const pid = String(post?.id || post?.tid || '').trim()
if (pid) parts.set('post_id', pid)
const mid = String(m?.id || '').trim()
if (mid) parts.set('media_id', mid)
const postType = String(post?.type || '1').trim()
if (postType) parts.set('post_type', postType)
const mediaType = String(m?.type || '2').trim()
if (mediaType) parts.set('media_type', mediaType)
const token = String(m?.token || m?.urlAttrs?.token || m?.thumbAttrs?.token || '').trim() const token = String(m?.token || m?.urlAttrs?.token || m?.thumbAttrs?.token || '').trim()
if (token) parts.set('token', token) if (token) parts.set('token', token)
@@ -1995,8 +2047,9 @@ const getSnsMediaUrl = (post, m, idx, rawUrl) => {
parts.set('use_cache', snsUseCache.value ? '1' : '0') parts.set('use_cache', snsUseCache.value ? '1' : '0')
// When cache is disabled, bust browser caching so backend really downloads+decrypts each time. // When cache is disabled, bust browser caching so backend really downloads+decrypts each time.
if (!snsUseCache.value) parts.set('_t', String(Date.now())) if (!snsUseCache.value) parts.set('_t', String(Date.now()))
// Bump this when changing the WeFlow-aligned image pipeline to avoid stale browser caches. if (md5) parts.set('md5', md5)
parts.set('v', '10') // 修改后端媒体匹配逻辑时递增版本号,避免浏览器复用旧的错误缓存。
parts.set('v', '11')
parts.set('url', raw) parts.set('url', raw)
return `${apiBase}/sns/media?${parts.toString()}` return `${apiBase}/sns/media?${parts.toString()}`
} }
+389 -2
View File
@@ -1,3 +1,4 @@
from bisect import bisect_left, bisect_right
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
import os import os
@@ -877,6 +878,305 @@ def _sns_video_roots(wxid_dir_str: str) -> tuple[str, ...]:
roots.sort() roots.sort()
return tuple(roots) return tuple(roots)
def _image_size_from_bytes(data: bytes, media_type: str) -> tuple[int, int]:
mt = str(media_type or "").lower()
if mt == "image/png":
if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
try:
w = int.from_bytes(data[16:20], "big")
h = int.from_bytes(data[20:24], "big")
return w, h
except Exception:
return 0, 0
return 0, 0
if mt in {"image/jpeg", "image/jpg"}:
if len(data) < 4 or data[0:2] != b"\xff\xd8":
return 0, 0
i = 2
n = len(data)
while i + 9 < n:
if data[i] != 0xFF:
i += 1
continue
marker = data[i + 1]
i += 2
while marker == 0xFF and i < n:
marker = data[i]
i += 1
if marker in {0xD8, 0xD9}:
continue
if i + 2 > n:
return 0, 0
seg_len = (data[i] << 8) + data[i + 1]
i += 2
if seg_len < 2 or i + seg_len - 2 > n:
return 0, 0
if marker in {
0xC0,
0xC1,
0xC2,
0xC3,
0xC5,
0xC6,
0xC7,
0xC9,
0xCA,
0xCB,
0xCD,
0xCE,
0xCF,
}:
if i + 4 < len(data):
try:
h = (data[i + 1] << 8) + data[i + 2]
w = (data[i + 3] << 8) + data[i + 4]
return w, h
except Exception:
return 0, 0
i += seg_len - 2
return 0, 0
return 0, 0
@lru_cache(maxsize=16)
def _sns_img_roots(wxid_dir_str: str) -> tuple[str, ...]:
"""列出包含 `Sns/Img` 的月份缓存目录。"""
wxid_dir = Path(str(wxid_dir_str or "").strip())
cache_root = wxid_dir / "cache"
try:
month_dirs = [p for p in cache_root.iterdir() if p.is_dir()]
except Exception:
month_dirs = []
roots: list[str] = []
for mdir in month_dirs:
img_root = mdir / "Sns" / "Img"
try:
if img_root.exists() and img_root.is_dir():
roots.append(str(img_root))
except Exception:
continue
roots.sort()
return tuple(roots)
@lru_cache(maxsize=16)
def _sns_img_time_index(wxid_dir_str: str) -> tuple[list[float], list[str]]:
"""为朋友圈本地图片缓存构建按修改时间排序的索引。"""
wxid_dir = Path(str(wxid_dir_str or "").strip())
out: list[tuple[float, str]] = []
cache_root = wxid_dir / "cache"
try:
month_dirs = [p for p in cache_root.iterdir() if p.is_dir()]
except Exception:
month_dirs = []
for mdir in month_dirs:
img_root = mdir / "Sns" / "Img"
try:
if not (img_root.exists() and img_root.is_dir()):
continue
except Exception:
continue
try:
for sub in img_root.iterdir():
if not sub.is_dir():
continue
for f in sub.iterdir():
try:
if not f.is_file():
continue
st = f.stat()
out.append((float(st.st_mtime), str(f)))
except Exception:
continue
except Exception:
continue
out.sort(key=lambda x: x[0])
mtimes = [m for m, _p in out]
paths = [_p for _m, _p in out]
return mtimes, paths
def _normalize_hex32(value: Optional[str]) -> str:
"""提取前 32 位十六进制字符,不存在则返回空字符串。"""
s = str(value or "").strip().lower()
if not s:
return ""
s = re.sub(r"[^0-9a-f]", "", s)
if len(s) < 32:
return ""
return s[:32]
def _sns_cache_key_from_path(p: Path) -> str:
"""从 `cache/.../Sns/Img/<2hex>/<30hex>` 路径还原 32 位缓存 key。"""
try:
key = f"{p.parent.name}{p.name}"
except Exception:
return ""
return _normalize_hex32(key)
def _generate_sns_cache_key(tid: str, media_id: str, media_type: int = 2) -> str:
if not tid or not media_id:
return ""
raw_key = f"{tid}_{media_id}_{media_type}"
try:
return hashlib.md5(raw_key.encode("utf-8")).hexdigest()
except Exception:
return ""
def _resolve_sns_cached_image_path_by_cache_key(
*,
wxid_dir: Path,
cache_key: str,
create_time: int,
) -> Optional[str]:
key32 = _normalize_hex32(cache_key)
if not key32:
return None
sub = key32[:2]
rest = key32[2:]
roots = _sns_img_roots(str(wxid_dir))
if not roots:
return None
best: tuple[float, str] | None = None
for root_str in roots:
try:
p = Path(root_str) / sub / rest
if not (p.exists() and p.is_file()):
continue
st = p.stat()
score = abs(float(st.st_mtime) - float(create_time)) if create_time > 0 else -float(st.st_mtime)
if best is None or score < best[0]:
best = (score, str(p))
except Exception:
continue
return best[1] if best else None
def _resolve_sns_cached_image_path_by_md5(
*,
wxid_dir: Path,
md5: str,
create_time: int,
) -> Optional[str]:
md5_32 = _normalize_hex32(md5)
if not md5_32:
return None
sub = md5_32[:2]
rest = md5_32[2:]
roots = _sns_img_roots(str(wxid_dir))
if not roots:
return None
best: tuple[float, str] | None = None
for root_str in roots:
try:
p = Path(root_str) / sub / rest
if not (p.exists() and p.is_file()):
continue
st = p.stat()
score = abs(float(st.st_mtime) - float(create_time)) if create_time > 0 else -float(st.st_mtime)
if best is None or score < best[0]:
best = (score, str(p))
except Exception:
continue
return best[1] if best else None
@lru_cache(maxsize=4096)
def _resolve_sns_cached_image_path(
*,
account_dir_str: str,
create_time: int,
width: int,
height: int,
idx: int,
total_size: int = 0,
) -> Optional[str]:
"""根据朋友圈动态和媒体元数据尽力匹配本地图片缓存。"""
total_size_i = int(total_size or 0)
must_match_size = width > 0 and height > 0
if (not must_match_size) and total_size_i <= 0:
return None
account_dir = Path(str(account_dir_str or "").strip())
if not account_dir.exists():
return None
wxid_dir = _resolve_account_wxid_dir(account_dir)
if not wxid_dir:
return None
mtimes, paths = _sns_img_time_index(str(wxid_dir))
if not mtimes:
return None
create_time_i = int(create_time or 0)
if create_time_i > 0:
window = 72 * 3600
lo = create_time_i - window
hi = create_time_i + window
l = bisect_left(mtimes, lo)
r = bisect_right(mtimes, hi)
if l >= r:
l = max(0, len(mtimes) - 800)
r = len(mtimes)
else:
l = max(0, len(mtimes) - 800)
r = len(mtimes)
candidates: list[tuple[float, str]] = []
for j in range(l, r):
try:
if create_time_i > 0:
candidates.append((abs(mtimes[j] - float(create_time_i)), paths[j]))
else:
candidates.append((-mtimes[j], paths[j]))
except Exception:
continue
candidates.sort(key=lambda x: x[0])
matched: list[tuple[int, float, str]] = []
for diff, pstr in candidates[:2000]:
try:
p = Path(pstr)
payload, media_type = _read_and_maybe_decrypt_media(p, account_dir)
if not payload or not str(media_type or "").startswith("image/"):
continue
if must_match_size:
w0, h0 = _image_size_from_bytes(payload, str(media_type or ""))
if (w0, h0) != (width, height):
continue
size_diff = abs(len(payload) - total_size_i) if total_size_i > 0 else 0
matched.append((int(size_diff), float(diff), pstr))
except Exception:
continue
if not matched:
return None
if must_match_size:
matched.sort(key=lambda x: (x[0], x[1], x[2]))
if total_size_i > 0:
return matched[0][2]
idx0 = max(0, int(idx or 0))
return matched[idx0][2] if idx0 < len(matched) else None
if total_size_i > 0:
matched.sort(key=lambda x: (x[0], x[1], x[2]))
return matched[0][2]
return None
def _resolve_sns_cached_video_path( def _resolve_sns_cached_video_path(
wxid_dir: Path, wxid_dir: Path,
post_id: str, post_id: str,
@@ -2386,22 +2686,109 @@ async def _try_fetch_and_decrypt_sns_remote(
return resp return resp
@router.get("/api/sns/media", summary="获取朋友圈图片(下载解密优先)") @router.get("/api/sns/media", summary="获取朋友圈图片(本地缓存优先)")
async def get_sns_media( async def get_sns_media(
account: Optional[str] = None, account: Optional[str] = None,
create_time: int = 0,
width: int = 0,
height: int = 0,
total_size: int = 0,
idx: int = 0,
post_id: Optional[str] = None,
media_id: Optional[str] = None,
post_type: int = 1,
media_type: int = 2,
md5: Optional[str] = None,
token: Optional[str] = None, token: Optional[str] = None,
key: Optional[str] = None, key: Optional[str] = None,
use_cache: int = 1, use_cache: int = 1,
url: Optional[str] = None, url: Optional[str] = None,
): ):
account_dir = _resolve_account_dir(account) account_dir = _resolve_account_dir(account)
wxid_dir = _resolve_account_wxid_dir(account_dir)
try: try:
use_cache_flag = bool(int(use_cache or 1)) use_cache_flag = bool(int(use_cache or 1))
except Exception: except Exception:
use_cache_flag = True use_cache_flag = True
# 0) Prefer WeFlow-style remote download + decrypt (accurate, avoids local cache mismatch). if use_cache_flag:
if wxid_dir and post_id and media_id and int(post_type or 1) == 7:
try:
raw_key = f"{post_id}_{media_id}_4"
bkg_md5 = hashlib.md5(raw_key.encode("utf-8", errors="ignore")).hexdigest()
bkg_path = wxid_dir / "business" / "sns" / "bkg" / bkg_md5[:2] / bkg_md5
if bkg_path.exists() and bkg_path.is_file():
return FileResponse(
str(bkg_path),
media_type="image/jpeg",
headers={"Cache-Control": "public, max-age=31536000", "X-SNS-Source": "local-cover-cache"},
)
except Exception:
pass
local_path = ""
# 1) 精确路径匹配:md5(tid_mediaId_type)。
if wxid_dir and post_id and media_id:
try:
key_post = _generate_sns_cache_key(str(post_id), str(media_id), int(post_type or 1))
local_path = _resolve_sns_cached_image_path_by_cache_key(
wxid_dir=wxid_dir,
cache_key=key_post,
create_time=0,
) or ""
except Exception:
local_path = ""
if (not local_path) and int(post_type or 1) != int(media_type or 2):
try:
key_media = _generate_sns_cache_key(str(post_id), str(media_id), int(media_type or 2))
local_path = _resolve_sns_cached_image_path_by_cache_key(
wxid_dir=wxid_dir,
cache_key=key_media,
create_time=0,
) or ""
except Exception:
local_path = ""
# 2) 使用 XML 或 URL 里携带的 md5 匹配缓存布局。
if (not local_path) and wxid_dir and _normalize_hex32(md5):
try:
local_path = _resolve_sns_cached_image_path_by_md5(
wxid_dir=wxid_dir,
md5=str(md5 or ""),
create_time=int(create_time or 0),
) or ""
except Exception:
local_path = ""
# 3) 旧版启发式匹配:发布时间、尺寸、文件大小和同尺寸组内序号。
if not local_path:
try:
local_path = _resolve_sns_cached_image_path(
account_dir_str=str(account_dir),
create_time=int(create_time or 0),
width=int(width or 0),
height=int(height or 0),
idx=max(0, int(idx or 0)),
total_size=int(total_size or 0),
) or ""
except Exception:
local_path = ""
if local_path:
try:
payload, local_media_type = _read_and_maybe_decrypt_media(Path(local_path), account_dir)
if payload and str(local_media_type or "").startswith("image/"):
resp = Response(content=payload, media_type=str(local_media_type or "image/jpeg"))
resp.headers["Cache-Control"] = "public, max-age=31536000"
resp.headers["X-SNS-Source"] = "local-cache"
return resp
except Exception:
pass
# 4) 最后再走远程:WeFlow 风格下载、解密和远程缓存。
remote_resp = await _try_fetch_and_decrypt_sns_remote( remote_resp = await _try_fetch_and_decrypt_sns_remote(
account_dir=account_dir, account_dir=account_dir,
url=str(url or ""), url=str(url or ""),
+18 -18
View File
@@ -982,24 +982,8 @@ class SnsExportManager:
payload = b"" payload = b""
mt = "" mt = ""
# 0) Prefer WeFlow-style remote download+decrypt (accurate when keys are present). # 0) 优先本地缓存;旧朋友圈的 CDN 资源可能已不可用或已降级。
if fixed: if use_cache:
should_cancel()
res = run_async(
_try_fetch_and_decrypt_sns_image_remote(
account_dir=account_dir,
url=fixed,
key=str(key or ""),
token=str(token or ""),
use_cache=use_cache,
)
)
if res is not None:
payload = bytes(res.payload or b"")
mt = str(res.media_type or "")
# 1) Local cache fallback (only when cache is enabled; mirrors `/api/sns/media` semantics).
if (not payload) and use_cache:
try: try:
post_type = int(post.get("type") or 1) post_type = int(post.get("type") or 1)
except Exception: except Exception:
@@ -1086,6 +1070,22 @@ class SnsExportManager:
payload = b"" payload = b""
mt = "" mt = ""
# 1) 本地未命中后,再走远程下载和解密。
if (not payload) and fixed:
should_cancel()
res = run_async(
_try_fetch_and_decrypt_sns_image_remote(
account_dir=account_dir,
url=fixed,
key=str(key or ""),
token=str(token or ""),
use_cache=use_cache,
)
)
if res is not None:
payload = bytes(res.payload or b"")
mt = str(res.media_type or "")
# 2) Last resort: proxy the raw URL (may return a Tencent placeholder image). # 2) Last resort: proxy the raw URL (may return a Tencent placeholder image).
if (not payload) and str(raw_url or "").startswith("http"): if (not payload) and str(raw_url or "").startswith("http"):
try: try: