From e76c5d7dfb23cd5063d6cd55072b3cdf43c13f13 Mon Sep 17 00:00:00 2001 From: 2977094657 <2977094657@qq.com> Date: Thu, 21 May 2026 01:11:52 +0800 Subject: [PATCH] =?UTF-8?q?fix(sns-media):=20=E6=81=A2=E5=A4=8D=E6=9C=8B?= =?UTF-8?q?=E5=8F=8B=E5=9C=88=E5=9B=BE=E7=89=87=E6=9C=AC=E5=9C=B0=E7=BC=93?= =?UTF-8?q?=E5=AD=98=E4=BC=98=E5=85=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - /api/sns/media 先解析微信本地 cache/*/Sns/Img 与封面缓存,本地未命中后才走 WeFlow 远程下载解密 - 前端朋友圈图片请求重新携带 post_id、media_id、create_time、尺寸、md5 等本地匹配参数 - 朋友圈离线导出同步改为本地缓存优先,避免导出时优先触发网络请求 --- frontend/pages/sns.vue | 65 ++- src/wechat_decrypt_tool/routers/sns.py | 391 +++++++++++++++++- src/wechat_decrypt_tool/sns_export_service.py | 36 +- 3 files changed, 466 insertions(+), 26 deletions(-) diff --git a/frontend/pages/sns.vue b/frontend/pages/sns.vue index e8daf15..c35a1de 100644 --- a/frontend/pages/sns.vue +++ b/frontend/pages/sns.vue @@ -1964,6 +1964,33 @@ const upgradeTencentHttps = (u) => { return raw } +const normalizeHex32 = (value) => { + const raw = String(value ?? '').trim() + if (!raw) return '' + const hex = raw.replace(/[^0-9a-fA-F]/g, '').toLowerCase() + return hex.length >= 32 ? hex.slice(0, 32) : '' +} + +const mediaSizeKey = (m) => { + const t = String(m?.type ?? '') + const w = String(m?.size?.width || m?.size?.w || '').trim() + const h = String(m?.size?.height || m?.size?.h || '').trim() + const total = String(m?.size?.totalSize || m?.size?.total_size || m?.size?.total || '').trim() + return `${t}:${w}:${h}:${total}` +} + +const mediaSizeGroupIndex = (post, m, idx) => { + const list = Array.isArray(post?.media) ? post.media : [] + const key = mediaSizeKey(m) + const i0 = Number(idx) || 0 + if (!key || i0 <= 0) return i0 + let count = 0 + for (let i = 0; i < i0; i++) { + if (mediaSizeKey(list[i]) === key) count++ + } + return count +} + const getSnsMediaUrl = (post, m, idx, rawUrl) => { const raw = upgradeTencentHttps(String(rawUrl || '').trim()) if (!raw) return '' @@ -1980,12 +2007,37 @@ const getSnsMediaUrl = (post, m, idx, rawUrl) => { const host = new URL(raw).hostname.toLowerCase() if (host.endsWith('.qpic.cn') || host.endsWith('.qlogo.cn') || host.endsWith('.tc.qq.com')) { const acc = String(selectedAccount.value || '').trim() - // Match WeFlow's image pipeline: use a stable URL + key/token and let the - // backend handle cache-first remote fetch/decrypt. Avoid attaching legacy - // local-match metadata to the main image path so browser caching can reuse - // the same request URL for list + preview. + const ct = String(post?.createTime || '').trim() + const w = String(m?.size?.width || m?.size?.w || '').trim() + const h = String(m?.size?.height || m?.size?.h || '').trim() + const ts = String(m?.size?.totalSize || m?.size?.total_size || m?.size?.total || '').trim() + const sizeIdx = mediaSizeGroupIndex(post, m, idx) + let md5 = normalizeHex32(m?.urlAttrs?.md5 || m?.thumbAttrs?.md5 || m?.urlAttrs?.MD5 || m?.thumbAttrs?.MD5) + if (!md5) { + const match = /[?&]md5=([0-9a-fA-F]{16,32})/.exec(raw) + if (match?.[1]) md5 = normalizeHex32(match[1]) + } + const parts = new URLSearchParams() if (acc) parts.set('account', acc) + if (ct) parts.set('create_time', ct) + if (w) parts.set('width', w) + if (h) parts.set('height', h) + if (/^\d+$/.test(ts)) parts.set('total_size', ts) + parts.set('idx', String(Number(sizeIdx) || 0)) + + const pid = String(post?.id || post?.tid || '').trim() + if (pid) parts.set('post_id', pid) + + const mid = String(m?.id || '').trim() + if (mid) parts.set('media_id', mid) + + const postType = String(post?.type || '1').trim() + if (postType) parts.set('post_type', postType) + + const mediaType = String(m?.type || '2').trim() + if (mediaType) parts.set('media_type', mediaType) + const token = String(m?.token || m?.urlAttrs?.token || m?.thumbAttrs?.token || '').trim() if (token) parts.set('token', token) @@ -1995,8 +2047,9 @@ const getSnsMediaUrl = (post, m, idx, rawUrl) => { parts.set('use_cache', snsUseCache.value ? '1' : '0') // When cache is disabled, bust browser caching so backend really downloads+decrypts each time. if (!snsUseCache.value) parts.set('_t', String(Date.now())) - // Bump this when changing the WeFlow-aligned image pipeline to avoid stale browser caches. - parts.set('v', '10') + if (md5) parts.set('md5', md5) + // 修改后端媒体匹配逻辑时递增版本号,避免浏览器复用旧的错误缓存。 + parts.set('v', '11') parts.set('url', raw) return `${apiBase}/sns/media?${parts.toString()}` } diff --git a/src/wechat_decrypt_tool/routers/sns.py b/src/wechat_decrypt_tool/routers/sns.py index 8a4bb85..fd79bf0 100644 --- a/src/wechat_decrypt_tool/routers/sns.py +++ b/src/wechat_decrypt_tool/routers/sns.py @@ -1,3 +1,4 @@ +from bisect import bisect_left, bisect_right from functools import lru_cache from pathlib import Path import os @@ -877,6 +878,305 @@ def _sns_video_roots(wxid_dir_str: str) -> tuple[str, ...]: roots.sort() return tuple(roots) + +def _image_size_from_bytes(data: bytes, media_type: str) -> tuple[int, int]: + mt = str(media_type or "").lower() + if mt == "image/png": + if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"): + try: + w = int.from_bytes(data[16:20], "big") + h = int.from_bytes(data[20:24], "big") + return w, h + except Exception: + return 0, 0 + return 0, 0 + + if mt in {"image/jpeg", "image/jpg"}: + if len(data) < 4 or data[0:2] != b"\xff\xd8": + return 0, 0 + i = 2 + n = len(data) + while i + 9 < n: + if data[i] != 0xFF: + i += 1 + continue + marker = data[i + 1] + i += 2 + while marker == 0xFF and i < n: + marker = data[i] + i += 1 + if marker in {0xD8, 0xD9}: + continue + if i + 2 > n: + return 0, 0 + seg_len = (data[i] << 8) + data[i + 1] + i += 2 + if seg_len < 2 or i + seg_len - 2 > n: + return 0, 0 + if marker in { + 0xC0, + 0xC1, + 0xC2, + 0xC3, + 0xC5, + 0xC6, + 0xC7, + 0xC9, + 0xCA, + 0xCB, + 0xCD, + 0xCE, + 0xCF, + }: + if i + 4 < len(data): + try: + h = (data[i + 1] << 8) + data[i + 2] + w = (data[i + 3] << 8) + data[i + 4] + return w, h + except Exception: + return 0, 0 + i += seg_len - 2 + return 0, 0 + return 0, 0 + + +@lru_cache(maxsize=16) +def _sns_img_roots(wxid_dir_str: str) -> tuple[str, ...]: + """列出包含 `Sns/Img` 的月份缓存目录。""" + wxid_dir = Path(str(wxid_dir_str or "").strip()) + cache_root = wxid_dir / "cache" + try: + month_dirs = [p for p in cache_root.iterdir() if p.is_dir()] + except Exception: + month_dirs = [] + + roots: list[str] = [] + for mdir in month_dirs: + img_root = mdir / "Sns" / "Img" + try: + if img_root.exists() and img_root.is_dir(): + roots.append(str(img_root)) + except Exception: + continue + roots.sort() + return tuple(roots) + + +@lru_cache(maxsize=16) +def _sns_img_time_index(wxid_dir_str: str) -> tuple[list[float], list[str]]: + """为朋友圈本地图片缓存构建按修改时间排序的索引。""" + wxid_dir = Path(str(wxid_dir_str or "").strip()) + out: list[tuple[float, str]] = [] + + cache_root = wxid_dir / "cache" + try: + month_dirs = [p for p in cache_root.iterdir() if p.is_dir()] + except Exception: + month_dirs = [] + + for mdir in month_dirs: + img_root = mdir / "Sns" / "Img" + try: + if not (img_root.exists() and img_root.is_dir()): + continue + except Exception: + continue + try: + for sub in img_root.iterdir(): + if not sub.is_dir(): + continue + for f in sub.iterdir(): + try: + if not f.is_file(): + continue + st = f.stat() + out.append((float(st.st_mtime), str(f))) + except Exception: + continue + except Exception: + continue + + out.sort(key=lambda x: x[0]) + mtimes = [m for m, _p in out] + paths = [_p for _m, _p in out] + return mtimes, paths + + +def _normalize_hex32(value: Optional[str]) -> str: + """提取前 32 位十六进制字符,不存在则返回空字符串。""" + s = str(value or "").strip().lower() + if not s: + return "" + s = re.sub(r"[^0-9a-f]", "", s) + if len(s) < 32: + return "" + return s[:32] + + +def _sns_cache_key_from_path(p: Path) -> str: + """从 `cache/.../Sns/Img/<2hex>/<30hex>` 路径还原 32 位缓存 key。""" + try: + key = f"{p.parent.name}{p.name}" + except Exception: + return "" + return _normalize_hex32(key) + + +def _generate_sns_cache_key(tid: str, media_id: str, media_type: int = 2) -> str: + if not tid or not media_id: + return "" + raw_key = f"{tid}_{media_id}_{media_type}" + try: + return hashlib.md5(raw_key.encode("utf-8")).hexdigest() + except Exception: + return "" + + +def _resolve_sns_cached_image_path_by_cache_key( + *, + wxid_dir: Path, + cache_key: str, + create_time: int, +) -> Optional[str]: + key32 = _normalize_hex32(cache_key) + if not key32: + return None + + sub = key32[:2] + rest = key32[2:] + roots = _sns_img_roots(str(wxid_dir)) + if not roots: + return None + + best: tuple[float, str] | None = None + for root_str in roots: + try: + p = Path(root_str) / sub / rest + if not (p.exists() and p.is_file()): + continue + st = p.stat() + score = abs(float(st.st_mtime) - float(create_time)) if create_time > 0 else -float(st.st_mtime) + if best is None or score < best[0]: + best = (score, str(p)) + except Exception: + continue + return best[1] if best else None + + +def _resolve_sns_cached_image_path_by_md5( + *, + wxid_dir: Path, + md5: str, + create_time: int, +) -> Optional[str]: + md5_32 = _normalize_hex32(md5) + if not md5_32: + return None + + sub = md5_32[:2] + rest = md5_32[2:] + roots = _sns_img_roots(str(wxid_dir)) + if not roots: + return None + + best: tuple[float, str] | None = None + for root_str in roots: + try: + p = Path(root_str) / sub / rest + if not (p.exists() and p.is_file()): + continue + st = p.stat() + score = abs(float(st.st_mtime) - float(create_time)) if create_time > 0 else -float(st.st_mtime) + if best is None or score < best[0]: + best = (score, str(p)) + except Exception: + continue + return best[1] if best else None + + +@lru_cache(maxsize=4096) +def _resolve_sns_cached_image_path( + *, + account_dir_str: str, + create_time: int, + width: int, + height: int, + idx: int, + total_size: int = 0, +) -> Optional[str]: + """根据朋友圈动态和媒体元数据尽力匹配本地图片缓存。""" + total_size_i = int(total_size or 0) + must_match_size = width > 0 and height > 0 + if (not must_match_size) and total_size_i <= 0: + return None + + account_dir = Path(str(account_dir_str or "").strip()) + if not account_dir.exists(): + return None + + wxid_dir = _resolve_account_wxid_dir(account_dir) + if not wxid_dir: + return None + + mtimes, paths = _sns_img_time_index(str(wxid_dir)) + if not mtimes: + return None + + create_time_i = int(create_time or 0) + if create_time_i > 0: + window = 72 * 3600 + lo = create_time_i - window + hi = create_time_i + window + l = bisect_left(mtimes, lo) + r = bisect_right(mtimes, hi) + if l >= r: + l = max(0, len(mtimes) - 800) + r = len(mtimes) + else: + l = max(0, len(mtimes) - 800) + r = len(mtimes) + + candidates: list[tuple[float, str]] = [] + for j in range(l, r): + try: + if create_time_i > 0: + candidates.append((abs(mtimes[j] - float(create_time_i)), paths[j])) + else: + candidates.append((-mtimes[j], paths[j])) + except Exception: + continue + candidates.sort(key=lambda x: x[0]) + + matched: list[tuple[int, float, str]] = [] + for diff, pstr in candidates[:2000]: + try: + p = Path(pstr) + payload, media_type = _read_and_maybe_decrypt_media(p, account_dir) + if not payload or not str(media_type or "").startswith("image/"): + continue + if must_match_size: + w0, h0 = _image_size_from_bytes(payload, str(media_type or "")) + if (w0, h0) != (width, height): + continue + size_diff = abs(len(payload) - total_size_i) if total_size_i > 0 else 0 + matched.append((int(size_diff), float(diff), pstr)) + except Exception: + continue + + if not matched: + return None + if must_match_size: + matched.sort(key=lambda x: (x[0], x[1], x[2])) + if total_size_i > 0: + return matched[0][2] + idx0 = max(0, int(idx or 0)) + return matched[idx0][2] if idx0 < len(matched) else None + if total_size_i > 0: + matched.sort(key=lambda x: (x[0], x[1], x[2])) + return matched[0][2] + return None + + def _resolve_sns_cached_video_path( wxid_dir: Path, post_id: str, @@ -2386,22 +2686,109 @@ async def _try_fetch_and_decrypt_sns_remote( return resp -@router.get("/api/sns/media", summary="获取朋友圈图片(下载解密优先)") +@router.get("/api/sns/media", summary="获取朋友圈图片(本地缓存优先)") async def get_sns_media( account: Optional[str] = None, + create_time: int = 0, + width: int = 0, + height: int = 0, + total_size: int = 0, + idx: int = 0, + post_id: Optional[str] = None, + media_id: Optional[str] = None, + post_type: int = 1, + media_type: int = 2, + md5: Optional[str] = None, token: Optional[str] = None, key: Optional[str] = None, use_cache: int = 1, url: Optional[str] = None, ): account_dir = _resolve_account_dir(account) + wxid_dir = _resolve_account_wxid_dir(account_dir) try: use_cache_flag = bool(int(use_cache or 1)) except Exception: use_cache_flag = True - # 0) Prefer WeFlow-style remote download + decrypt (accurate, avoids local cache mismatch). + if use_cache_flag: + if wxid_dir and post_id and media_id and int(post_type or 1) == 7: + try: + raw_key = f"{post_id}_{media_id}_4" + bkg_md5 = hashlib.md5(raw_key.encode("utf-8", errors="ignore")).hexdigest() + bkg_path = wxid_dir / "business" / "sns" / "bkg" / bkg_md5[:2] / bkg_md5 + if bkg_path.exists() and bkg_path.is_file(): + return FileResponse( + str(bkg_path), + media_type="image/jpeg", + headers={"Cache-Control": "public, max-age=31536000", "X-SNS-Source": "local-cover-cache"}, + ) + except Exception: + pass + + local_path = "" + + # 1) 精确路径匹配:md5(tid_mediaId_type)。 + if wxid_dir and post_id and media_id: + try: + key_post = _generate_sns_cache_key(str(post_id), str(media_id), int(post_type or 1)) + local_path = _resolve_sns_cached_image_path_by_cache_key( + wxid_dir=wxid_dir, + cache_key=key_post, + create_time=0, + ) or "" + except Exception: + local_path = "" + + if (not local_path) and int(post_type or 1) != int(media_type or 2): + try: + key_media = _generate_sns_cache_key(str(post_id), str(media_id), int(media_type or 2)) + local_path = _resolve_sns_cached_image_path_by_cache_key( + wxid_dir=wxid_dir, + cache_key=key_media, + create_time=0, + ) or "" + except Exception: + local_path = "" + + # 2) 使用 XML 或 URL 里携带的 md5 匹配缓存布局。 + if (not local_path) and wxid_dir and _normalize_hex32(md5): + try: + local_path = _resolve_sns_cached_image_path_by_md5( + wxid_dir=wxid_dir, + md5=str(md5 or ""), + create_time=int(create_time or 0), + ) or "" + except Exception: + local_path = "" + + # 3) 旧版启发式匹配:发布时间、尺寸、文件大小和同尺寸组内序号。 + if not local_path: + try: + local_path = _resolve_sns_cached_image_path( + account_dir_str=str(account_dir), + create_time=int(create_time or 0), + width=int(width or 0), + height=int(height or 0), + idx=max(0, int(idx or 0)), + total_size=int(total_size or 0), + ) or "" + except Exception: + local_path = "" + + if local_path: + try: + payload, local_media_type = _read_and_maybe_decrypt_media(Path(local_path), account_dir) + if payload and str(local_media_type or "").startswith("image/"): + resp = Response(content=payload, media_type=str(local_media_type or "image/jpeg")) + resp.headers["Cache-Control"] = "public, max-age=31536000" + resp.headers["X-SNS-Source"] = "local-cache" + return resp + except Exception: + pass + + # 4) 最后再走远程:WeFlow 风格下载、解密和远程缓存。 remote_resp = await _try_fetch_and_decrypt_sns_remote( account_dir=account_dir, url=str(url or ""), diff --git a/src/wechat_decrypt_tool/sns_export_service.py b/src/wechat_decrypt_tool/sns_export_service.py index 44b5997..2f6f05a 100644 --- a/src/wechat_decrypt_tool/sns_export_service.py +++ b/src/wechat_decrypt_tool/sns_export_service.py @@ -982,24 +982,8 @@ class SnsExportManager: payload = b"" mt = "" - # 0) Prefer WeFlow-style remote download+decrypt (accurate when keys are present). - if fixed: - should_cancel() - res = run_async( - _try_fetch_and_decrypt_sns_image_remote( - account_dir=account_dir, - url=fixed, - key=str(key or ""), - token=str(token or ""), - use_cache=use_cache, - ) - ) - if res is not None: - payload = bytes(res.payload or b"") - mt = str(res.media_type or "") - - # 1) Local cache fallback (only when cache is enabled; mirrors `/api/sns/media` semantics). - if (not payload) and use_cache: + # 0) 优先本地缓存;旧朋友圈的 CDN 资源可能已不可用或已降级。 + if use_cache: try: post_type = int(post.get("type") or 1) except Exception: @@ -1086,6 +1070,22 @@ class SnsExportManager: payload = b"" mt = "" + # 1) 本地未命中后,再走远程下载和解密。 + if (not payload) and fixed: + should_cancel() + res = run_async( + _try_fetch_and_decrypt_sns_image_remote( + account_dir=account_dir, + url=fixed, + key=str(key or ""), + token=str(token or ""), + use_cache=use_cache, + ) + ) + if res is not None: + payload = bytes(res.payload or b"") + mt = str(res.media_type or "") + # 2) Last resort: proxy the raw URL (may return a Tencent placeholder image). if (not payload) and str(raw_url or "").startswith("http"): try: