From e76c5d7dfb23cd5063d6cd55072b3cdf43c13f13 Mon Sep 17 00:00:00 2001
From: 2977094657 <2977094657@qq.com>
Date: Thu, 21 May 2026 01:11:52 +0800
Subject: [PATCH] =?UTF-8?q?fix(sns-media):=20=E6=81=A2=E5=A4=8D=E6=9C=8B?=
 =?UTF-8?q?=E5=8F=8B=E5=9C=88=E5=9B=BE=E7=89=87=E6=9C=AC=E5=9C=B0=E7=BC=93?=
 =?UTF-8?q?=E5=AD=98=E4=BC=98=E5=85=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- /api/sns/media 先解析微信本地 cache/*/Sns/Img 与封面缓存，本地未命中后才走 WeFlow 远程下载解密

- 前端朋友圈图片请求重新携带 post_id、media_id、create_time、尺寸、md5 等本地匹配参数

- 朋友圈离线导出同步改为本地缓存优先，避免导出时优先触发网络请求
---
 frontend/pages/sns.vue                        |  65 ++-
 src/wechat_decrypt_tool/routers/sns.py        | 391 +++++++++++++++++-
 src/wechat_decrypt_tool/sns_export_service.py |  36 +-
 3 files changed, 466 insertions(+), 26 deletions(-)

diff --git a/frontend/pages/sns.vue b/frontend/pages/sns.vue
index e8daf15..c35a1de 100644
--- a/frontend/pages/sns.vue
+++ b/frontend/pages/sns.vue
@@ -1964,6 +1964,33 @@ const upgradeTencentHttps = (u) => {
   return raw
 }
 
+const normalizeHex32 = (value) => {
+  const raw = String(value ?? '').trim()
+  if (!raw) return ''
+  const hex = raw.replace(/[^0-9a-fA-F]/g, '').toLowerCase()
+  return hex.length >= 32 ? hex.slice(0, 32) : ''
+}
+
+const mediaSizeKey = (m) => {
+  const t = String(m?.type ?? '')
+  const w = String(m?.size?.width || m?.size?.w || '').trim()
+  const h = String(m?.size?.height || m?.size?.h || '').trim()
+  const total = String(m?.size?.totalSize || m?.size?.total_size || m?.size?.total || '').trim()
+  return `${t}:${w}:${h}:${total}`
+}
+
+const mediaSizeGroupIndex = (post, m, idx) => {
+  const list = Array.isArray(post?.media) ? post.media : []
+  const key = mediaSizeKey(m)
+  const i0 = Number(idx) || 0
+  if (!key || i0 <= 0) return i0
+  let count = 0
+  for (let i = 0; i < i0; i++) {
+    if (mediaSizeKey(list[i]) === key) count++
+  }
+  return count
+}
+
 const getSnsMediaUrl = (post, m, idx, rawUrl) => {
   const raw = upgradeTencentHttps(String(rawUrl || '').trim())
   if (!raw) return ''
@@ -1980,12 +2007,37 @@ const getSnsMediaUrl = (post, m, idx, rawUrl) => {
       const host = new URL(raw).hostname.toLowerCase()
       if (host.endsWith('.qpic.cn') || host.endsWith('.qlogo.cn') || host.endsWith('.tc.qq.com')) {
         const acc = String(selectedAccount.value || '').trim()
-        // Match WeFlow's image pipeline: use a stable URL + key/token and let the
-        // backend handle cache-first remote fetch/decrypt. Avoid attaching legacy
-        // local-match metadata to the main image path so browser caching can reuse
-        // the same request URL for list + preview.
+        const ct = String(post?.createTime || '').trim()
+        const w = String(m?.size?.width || m?.size?.w || '').trim()
+        const h = String(m?.size?.height || m?.size?.h || '').trim()
+        const ts = String(m?.size?.totalSize || m?.size?.total_size || m?.size?.total || '').trim()
+        const sizeIdx = mediaSizeGroupIndex(post, m, idx)
+        let md5 = normalizeHex32(m?.urlAttrs?.md5 || m?.thumbAttrs?.md5 || m?.urlAttrs?.MD5 || m?.thumbAttrs?.MD5)
+        if (!md5) {
+          const match = /[?&]md5=([0-9a-fA-F]{16,32})/.exec(raw)
+          if (match?.[1]) md5 = normalizeHex32(match[1])
+        }
+
         const parts = new URLSearchParams()
         if (acc) parts.set('account', acc)
+        if (ct) parts.set('create_time', ct)
+        if (w) parts.set('width', w)
+        if (h) parts.set('height', h)
+        if (/^\d+$/.test(ts)) parts.set('total_size', ts)
+        parts.set('idx', String(Number(sizeIdx) || 0))
+
+        const pid = String(post?.id || post?.tid || '').trim()
+        if (pid) parts.set('post_id', pid)
+
+        const mid = String(m?.id || '').trim()
+        if (mid) parts.set('media_id', mid)
+
+        const postType = String(post?.type || '1').trim()
+        if (postType) parts.set('post_type', postType)
+
+        const mediaType = String(m?.type || '2').trim()
+        if (mediaType) parts.set('media_type', mediaType)
+
         const token = String(m?.token || m?.urlAttrs?.token || m?.thumbAttrs?.token || '').trim()
         if (token) parts.set('token', token)
 
@@ -1995,8 +2047,9 @@ const getSnsMediaUrl = (post, m, idx, rawUrl) => {
         parts.set('use_cache', snsUseCache.value ? '1' : '0')
         // When cache is disabled, bust browser caching so backend really downloads+decrypts each time.
         if (!snsUseCache.value) parts.set('_t', String(Date.now()))
-        // Bump this when changing the WeFlow-aligned image pipeline to avoid stale browser caches.
-        parts.set('v', '10')
+        if (md5) parts.set('md5', md5)
+        // 修改后端媒体匹配逻辑时递增版本号，避免浏览器复用旧的错误缓存。
+        parts.set('v', '11')
         parts.set('url', raw)
         return `${apiBase}/sns/media?${parts.toString()}`
       }
diff --git a/src/wechat_decrypt_tool/routers/sns.py b/src/wechat_decrypt_tool/routers/sns.py
index 8a4bb85..fd79bf0 100644
--- a/src/wechat_decrypt_tool/routers/sns.py
+++ b/src/wechat_decrypt_tool/routers/sns.py
@@ -1,3 +1,4 @@
+from bisect import bisect_left, bisect_right
 from functools import lru_cache
 from pathlib import Path
 import os
@@ -877,6 +878,305 @@ def _sns_video_roots(wxid_dir_str: str) -> tuple[str, ...]:
     roots.sort()
     return tuple(roots)
 
+
+def _image_size_from_bytes(data: bytes, media_type: str) -> tuple[int, int]:
+    mt = str(media_type or "").lower()
+    if mt == "image/png":
+        if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
+            try:
+                w = int.from_bytes(data[16:20], "big")
+                h = int.from_bytes(data[20:24], "big")
+                return w, h
+            except Exception:
+                return 0, 0
+        return 0, 0
+
+    if mt in {"image/jpeg", "image/jpg"}:
+        if len(data) < 4 or data[0:2] != b"\xff\xd8":
+            return 0, 0
+        i = 2
+        n = len(data)
+        while i + 9 < n:
+            if data[i] != 0xFF:
+                i += 1
+                continue
+            marker = data[i + 1]
+            i += 2
+            while marker == 0xFF and i < n:
+                marker = data[i]
+                i += 1
+            if marker in {0xD8, 0xD9}:
+                continue
+            if i + 2 > n:
+                return 0, 0
+            seg_len = (data[i] << 8) + data[i + 1]
+            i += 2
+            if seg_len < 2 or i + seg_len - 2 > n:
+                return 0, 0
+            if marker in {
+                0xC0,
+                0xC1,
+                0xC2,
+                0xC3,
+                0xC5,
+                0xC6,
+                0xC7,
+                0xC9,
+                0xCA,
+                0xCB,
+                0xCD,
+                0xCE,
+                0xCF,
+            }:
+                if i + 4 < len(data):
+                    try:
+                        h = (data[i + 1] << 8) + data[i + 2]
+                        w = (data[i + 3] << 8) + data[i + 4]
+                        return w, h
+                    except Exception:
+                        return 0, 0
+            i += seg_len - 2
+        return 0, 0
+    return 0, 0
+
+
+@lru_cache(maxsize=16)
+def _sns_img_roots(wxid_dir_str: str) -> tuple[str, ...]:
+    """列出包含 `Sns/Img` 的月份缓存目录。"""
+    wxid_dir = Path(str(wxid_dir_str or "").strip())
+    cache_root = wxid_dir / "cache"
+    try:
+        month_dirs = [p for p in cache_root.iterdir() if p.is_dir()]
+    except Exception:
+        month_dirs = []
+
+    roots: list[str] = []
+    for mdir in month_dirs:
+        img_root = mdir / "Sns" / "Img"
+        try:
+            if img_root.exists() and img_root.is_dir():
+                roots.append(str(img_root))
+        except Exception:
+            continue
+    roots.sort()
+    return tuple(roots)
+
+
+@lru_cache(maxsize=16)
+def _sns_img_time_index(wxid_dir_str: str) -> tuple[list[float], list[str]]:
+    """为朋友圈本地图片缓存构建按修改时间排序的索引。"""
+    wxid_dir = Path(str(wxid_dir_str or "").strip())
+    out: list[tuple[float, str]] = []
+
+    cache_root = wxid_dir / "cache"
+    try:
+        month_dirs = [p for p in cache_root.iterdir() if p.is_dir()]
+    except Exception:
+        month_dirs = []
+
+    for mdir in month_dirs:
+        img_root = mdir / "Sns" / "Img"
+        try:
+            if not (img_root.exists() and img_root.is_dir()):
+                continue
+        except Exception:
+            continue
+        try:
+            for sub in img_root.iterdir():
+                if not sub.is_dir():
+                    continue
+                for f in sub.iterdir():
+                    try:
+                        if not f.is_file():
+                            continue
+                        st = f.stat()
+                        out.append((float(st.st_mtime), str(f)))
+                    except Exception:
+                        continue
+        except Exception:
+            continue
+
+    out.sort(key=lambda x: x[0])
+    mtimes = [m for m, _p in out]
+    paths = [_p for _m, _p in out]
+    return mtimes, paths
+
+
+def _normalize_hex32(value: Optional[str]) -> str:
+    """提取前 32 位十六进制字符，不存在则返回空字符串。"""
+    s = str(value or "").strip().lower()
+    if not s:
+        return ""
+    s = re.sub(r"[^0-9a-f]", "", s)
+    if len(s) < 32:
+        return ""
+    return s[:32]
+
+
+def _sns_cache_key_from_path(p: Path) -> str:
+    """从 `cache/.../Sns/Img/<2hex>/<30hex>` 路径还原 32 位缓存 key。"""
+    try:
+        key = f"{p.parent.name}{p.name}"
+    except Exception:
+        return ""
+    return _normalize_hex32(key)
+
+
+def _generate_sns_cache_key(tid: str, media_id: str, media_type: int = 2) -> str:
+    if not tid or not media_id:
+        return ""
+    raw_key = f"{tid}_{media_id}_{media_type}"
+    try:
+        return hashlib.md5(raw_key.encode("utf-8")).hexdigest()
+    except Exception:
+        return ""
+
+
+def _resolve_sns_cached_image_path_by_cache_key(
+    *,
+    wxid_dir: Path,
+    cache_key: str,
+    create_time: int,
+) -> Optional[str]:
+    key32 = _normalize_hex32(cache_key)
+    if not key32:
+        return None
+
+    sub = key32[:2]
+    rest = key32[2:]
+    roots = _sns_img_roots(str(wxid_dir))
+    if not roots:
+        return None
+
+    best: tuple[float, str] | None = None
+    for root_str in roots:
+        try:
+            p = Path(root_str) / sub / rest
+            if not (p.exists() and p.is_file()):
+                continue
+            st = p.stat()
+            score = abs(float(st.st_mtime) - float(create_time)) if create_time > 0 else -float(st.st_mtime)
+            if best is None or score < best[0]:
+                best = (score, str(p))
+        except Exception:
+            continue
+    return best[1] if best else None
+
+
+def _resolve_sns_cached_image_path_by_md5(
+    *,
+    wxid_dir: Path,
+    md5: str,
+    create_time: int,
+) -> Optional[str]:
+    md5_32 = _normalize_hex32(md5)
+    if not md5_32:
+        return None
+
+    sub = md5_32[:2]
+    rest = md5_32[2:]
+    roots = _sns_img_roots(str(wxid_dir))
+    if not roots:
+        return None
+
+    best: tuple[float, str] | None = None
+    for root_str in roots:
+        try:
+            p = Path(root_str) / sub / rest
+            if not (p.exists() and p.is_file()):
+                continue
+            st = p.stat()
+            score = abs(float(st.st_mtime) - float(create_time)) if create_time > 0 else -float(st.st_mtime)
+            if best is None or score < best[0]:
+                best = (score, str(p))
+        except Exception:
+            continue
+    return best[1] if best else None
+
+
+@lru_cache(maxsize=4096)
+def _resolve_sns_cached_image_path(
+    *,
+    account_dir_str: str,
+    create_time: int,
+    width: int,
+    height: int,
+    idx: int,
+    total_size: int = 0,
+) -> Optional[str]:
+    """根据朋友圈动态和媒体元数据尽力匹配本地图片缓存。"""
+    total_size_i = int(total_size or 0)
+    must_match_size = width > 0 and height > 0
+    if (not must_match_size) and total_size_i <= 0:
+        return None
+
+    account_dir = Path(str(account_dir_str or "").strip())
+    if not account_dir.exists():
+        return None
+
+    wxid_dir = _resolve_account_wxid_dir(account_dir)
+    if not wxid_dir:
+        return None
+
+    mtimes, paths = _sns_img_time_index(str(wxid_dir))
+    if not mtimes:
+        return None
+
+    create_time_i = int(create_time or 0)
+    if create_time_i > 0:
+        window = 72 * 3600
+        lo = create_time_i - window
+        hi = create_time_i + window
+        l = bisect_left(mtimes, lo)
+        r = bisect_right(mtimes, hi)
+        if l >= r:
+            l = max(0, len(mtimes) - 800)
+            r = len(mtimes)
+    else:
+        l = max(0, len(mtimes) - 800)
+        r = len(mtimes)
+
+    candidates: list[tuple[float, str]] = []
+    for j in range(l, r):
+        try:
+            if create_time_i > 0:
+                candidates.append((abs(mtimes[j] - float(create_time_i)), paths[j]))
+            else:
+                candidates.append((-mtimes[j], paths[j]))
+        except Exception:
+            continue
+    candidates.sort(key=lambda x: x[0])
+
+    matched: list[tuple[int, float, str]] = []
+    for diff, pstr in candidates[:2000]:
+        try:
+            p = Path(pstr)
+            payload, media_type = _read_and_maybe_decrypt_media(p, account_dir)
+            if not payload or not str(media_type or "").startswith("image/"):
+                continue
+            if must_match_size:
+                w0, h0 = _image_size_from_bytes(payload, str(media_type or ""))
+                if (w0, h0) != (width, height):
+                    continue
+            size_diff = abs(len(payload) - total_size_i) if total_size_i > 0 else 0
+            matched.append((int(size_diff), float(diff), pstr))
+        except Exception:
+            continue
+
+    if not matched:
+        return None
+    if must_match_size:
+        matched.sort(key=lambda x: (x[0], x[1], x[2]))
+        if total_size_i > 0:
+            return matched[0][2]
+        idx0 = max(0, int(idx or 0))
+        return matched[idx0][2] if idx0 < len(matched) else None
+    if total_size_i > 0:
+        matched.sort(key=lambda x: (x[0], x[1], x[2]))
+        return matched[0][2]
+    return None
+
+
 def _resolve_sns_cached_video_path(
     wxid_dir: Path,
     post_id: str,
@@ -2386,22 +2686,109 @@ async def _try_fetch_and_decrypt_sns_remote(
     return resp
 
 
-@router.get("/api/sns/media", summary="获取朋友圈图片（下载解密优先）")
+@router.get("/api/sns/media", summary="获取朋友圈图片（本地缓存优先）")
 async def get_sns_media(
         account: Optional[str] = None,
+        create_time: int = 0,
+        width: int = 0,
+        height: int = 0,
+        total_size: int = 0,
+        idx: int = 0,
+        post_id: Optional[str] = None,
+        media_id: Optional[str] = None,
+        post_type: int = 1,
+        media_type: int = 2,
+        md5: Optional[str] = None,
         token: Optional[str] = None,
         key: Optional[str] = None,
         use_cache: int = 1,
         url: Optional[str] = None,
 ):
     account_dir = _resolve_account_dir(account)
+    wxid_dir = _resolve_account_wxid_dir(account_dir)
 
     try:
         use_cache_flag = bool(int(use_cache or 1))
     except Exception:
         use_cache_flag = True
 
-    # 0) Prefer WeFlow-style remote download + decrypt (accurate, avoids local cache mismatch).
+    if use_cache_flag:
+        if wxid_dir and post_id and media_id and int(post_type or 1) == 7:
+            try:
+                raw_key = f"{post_id}_{media_id}_4"
+                bkg_md5 = hashlib.md5(raw_key.encode("utf-8", errors="ignore")).hexdigest()
+                bkg_path = wxid_dir / "business" / "sns" / "bkg" / bkg_md5[:2] / bkg_md5
+                if bkg_path.exists() and bkg_path.is_file():
+                    return FileResponse(
+                        str(bkg_path),
+                        media_type="image/jpeg",
+                        headers={"Cache-Control": "public, max-age=31536000", "X-SNS-Source": "local-cover-cache"},
+                    )
+            except Exception:
+                pass
+
+        local_path = ""
+
+        # 1) 精确路径匹配：md5(tid_mediaId_type)。
+        if wxid_dir and post_id and media_id:
+            try:
+                key_post = _generate_sns_cache_key(str(post_id), str(media_id), int(post_type or 1))
+                local_path = _resolve_sns_cached_image_path_by_cache_key(
+                    wxid_dir=wxid_dir,
+                    cache_key=key_post,
+                    create_time=0,
+                ) or ""
+            except Exception:
+                local_path = ""
+
+            if (not local_path) and int(post_type or 1) != int(media_type or 2):
+                try:
+                    key_media = _generate_sns_cache_key(str(post_id), str(media_id), int(media_type or 2))
+                    local_path = _resolve_sns_cached_image_path_by_cache_key(
+                        wxid_dir=wxid_dir,
+                        cache_key=key_media,
+                        create_time=0,
+                    ) or ""
+                except Exception:
+                    local_path = ""
+
+        # 2) 使用 XML 或 URL 里携带的 md5 匹配缓存布局。
+        if (not local_path) and wxid_dir and _normalize_hex32(md5):
+            try:
+                local_path = _resolve_sns_cached_image_path_by_md5(
+                    wxid_dir=wxid_dir,
+                    md5=str(md5 or ""),
+                    create_time=int(create_time or 0),
+                ) or ""
+            except Exception:
+                local_path = ""
+
+        # 3) 旧版启发式匹配：发布时间、尺寸、文件大小和同尺寸组内序号。
+        if not local_path:
+            try:
+                local_path = _resolve_sns_cached_image_path(
+                    account_dir_str=str(account_dir),
+                    create_time=int(create_time or 0),
+                    width=int(width or 0),
+                    height=int(height or 0),
+                    idx=max(0, int(idx or 0)),
+                    total_size=int(total_size or 0),
+                ) or ""
+            except Exception:
+                local_path = ""
+
+        if local_path:
+            try:
+                payload, local_media_type = _read_and_maybe_decrypt_media(Path(local_path), account_dir)
+                if payload and str(local_media_type or "").startswith("image/"):
+                    resp = Response(content=payload, media_type=str(local_media_type or "image/jpeg"))
+                    resp.headers["Cache-Control"] = "public, max-age=31536000"
+                    resp.headers["X-SNS-Source"] = "local-cache"
+                    return resp
+            except Exception:
+                pass
+
+    # 4) 最后再走远程：WeFlow 风格下载、解密和远程缓存。
     remote_resp = await _try_fetch_and_decrypt_sns_remote(
         account_dir=account_dir,
         url=str(url or ""),
diff --git a/src/wechat_decrypt_tool/sns_export_service.py b/src/wechat_decrypt_tool/sns_export_service.py
index 44b5997..2f6f05a 100644
--- a/src/wechat_decrypt_tool/sns_export_service.py
+++ b/src/wechat_decrypt_tool/sns_export_service.py
@@ -982,24 +982,8 @@ class SnsExportManager:
             payload = b""
             mt = ""
 
-            # 0) Prefer WeFlow-style remote download+decrypt (accurate when keys are present).
-            if fixed:
-                should_cancel()
-                res = run_async(
-                    _try_fetch_and_decrypt_sns_image_remote(
-                        account_dir=account_dir,
-                        url=fixed,
-                        key=str(key or ""),
-                        token=str(token or ""),
-                        use_cache=use_cache,
-                    )
-                )
-                if res is not None:
-                    payload = bytes(res.payload or b"")
-                    mt = str(res.media_type or "")
-
-            # 1) Local cache fallback (only when cache is enabled; mirrors `/api/sns/media` semantics).
-            if (not payload) and use_cache:
+            # 0) 优先本地缓存；旧朋友圈的 CDN 资源可能已不可用或已降级。
+            if use_cache:
                 try:
                     post_type = int(post.get("type") or 1)
                 except Exception:
@@ -1086,6 +1070,22 @@ class SnsExportManager:
                         payload = b""
                         mt = ""
 
+            # 1) 本地未命中后，再走远程下载和解密。
+            if (not payload) and fixed:
+                should_cancel()
+                res = run_async(
+                    _try_fetch_and_decrypt_sns_image_remote(
+                        account_dir=account_dir,
+                        url=fixed,
+                        key=str(key or ""),
+                        token=str(token or ""),
+                        use_cache=use_cache,
+                    )
+                )
+                if res is not None:
+                    payload = bytes(res.payload or b"")
+                    mt = str(res.media_type or "")
+
             # 2) Last resort: proxy the raw URL (may return a Tencent placeholder image).
             if (not payload) and str(raw_url or "").startswith("http"):
                 try: