feat(chat): 聊天页支持日历定位/卡片解析/HTML导出分页

- 新增 /api/chat/messages/daily_counts 与 /api/chat/messages/anchor，用于月度热力图与按日/首条定位\n- messages/around 支持跨 message 分片定位，定位更稳定\n- 新增 /api/chat/chat_history/resolve 与 /api/chat/appmsg/resolve，合并转发/链接卡片可按 server_id 补全\n- 新增 /api/chat/media/favicon，并补齐 link 本地缩略图处理\n- HTML 导出支持分页加载（html_page_size），避免大聊天单文件卡顿\n- tests: 覆盖 heatmap/anchor、favicon 缓存、HTML 分页导出
2026-02-20 14:50:50 +08:00 · 2026-02-15 14:32:47 +08:00
parent 31d98abddf
commit bd44601611
10 changed files with 4375 additions and 302 deletions
--- a/src/wechat_decrypt_tool/chat_export_service.py
+++ b/src/wechat_decrypt_tool/chat_export_service.py
@@ -541,6 +541,11 @@ body { background: #EDEDED; }
 .wce-chat-title { font-size: 16px; font-weight: 500; color: #111827; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
 .wce-filter-select { font-size: 12px; padding: 6px 8px; border: 0; border-radius: 8px; background: transparent; color: #374151; }
 .wce-message-container { flex: 1; overflow: auto; padding: 16px; min-height: 0; }
+.wce-pager { display: flex; align-items: center; justify-content: center; gap: 12px; padding: 6px 0 12px; }
+.wce-pager-btn { font-size: 12px; padding: 6px 10px; border-radius: 8px; border: 1px solid #e5e7eb; background: #fff; color: #374151; cursor: pointer; }
+.wce-pager-btn:hover { background: #f9fafb; }
+.wce-pager-btn:disabled { opacity: 0.6; cursor: not-allowed; }
+.wce-pager-status { font-size: 12px; color: #6b7280; }

 /* Single session item (middle column). */
 .wce-session-item { display: flex; align-items: center; gap: 12px; padding: 0 12px; height: 80px; border-bottom: 1px solid #f3f4f6; background: #DEDEDE; text-decoration: none; color: inherit; }
@@ -838,6 +843,140 @@ _HTML_EXPORT_JS = r"""
    return obj
  }

+  const readPageMeta = () => {
+    const el = document.getElementById('wcePageMeta')
+    const obj = safeJsonParse(el ? el.textContent : '')
+    if (!obj || typeof obj !== 'object') return null
+    return obj
+  }
+
+  const initPagedMessageLoading = () => {
+    const meta = readPageMeta()
+    if (!meta) return
+
+    const totalPages = Number(meta.totalPages || 0)
+    if (!Number.isFinite(totalPages) || totalPages <= 1) return
+
+    const initialPage = Number(meta.initialPage || totalPages || 1)
+    const padWidth = Number(meta.padWidth || 0) || 0
+    const prefix = String(meta.pageFilePrefix || 'pages/page-')
+    const suffix = String(meta.pageFileSuffix || '.js')
+
+    const container = document.getElementById('messageContainer')
+    const list = document.getElementById('wceMessageList') || container
+    const pager = document.getElementById('wcePager')
+    const btn = document.getElementById('wceLoadPrevBtn')
+    const status = document.getElementById('wceLoadPrevStatus')
+    if (!container || !list || !pager || !btn) return
+
+    try { pager.style.display = '' } catch {}
+
+    const loaded = new Set()
+    loaded.add(initialPage)
+    let nextPage = initialPage - 1
+    let loading = false
+
+    const setStatus = (text) => {
+      try { if (status) status.textContent = String(text || '') } catch {}
+    }
+
+    const updateUi = (overrideText) => {
+      if (overrideText != null) {
+        setStatus(overrideText)
+        try { btn.disabled = false } catch {}
+        return
+      }
+      if (nextPage < 1) {
+        setStatus('已到底')
+        try { btn.disabled = true } catch {}
+        return
+      }
+      if (loading) {
+        setStatus('加载中...')
+        try { btn.disabled = true } catch {}
+        return
+      }
+      setStatus('点击加载更早消息')
+      try { btn.disabled = false } catch {}
+    }
+
+    const pageSrc = (n) => {
+      const num = padWidth > 0 ? String(n).padStart(padWidth, '0') : String(n)
+      return prefix + num + suffix
+    }
+
+    window.__WCE_PAGE_QUEUE__ = window.__WCE_PAGE_QUEUE__ || []
+    window.__WCE_PAGE_LOADED__ = (pageNo, html) => {
+      const n = Number(pageNo)
+      if (!Number.isFinite(n) || n < 1) return
+      if (loaded.has(n)) return
+      loaded.add(n)
+
+      try {
+        const prevH = container.scrollHeight
+        const prevTop = container.scrollTop
+        list.insertAdjacentHTML('afterbegin', String(html || ''))
+        const newH = container.scrollHeight
+        container.scrollTop = prevTop + (newH - prevH)
+      } catch {
+        try { list.insertAdjacentHTML('afterbegin', String(html || '')) } catch {}
+      }
+
+      loading = false
+      nextPage = n - 1
+      try { applyMessageTypeFilter() } catch {}
+      try { updateSessionMessageCount() } catch {}
+      updateUi()
+    }
+
+    // Flush any queued pages (should be rare, but keeps behavior robust).
+    try {
+      const q = window.__WCE_PAGE_QUEUE__
+      if (Array.isArray(q) && q.length) {
+        const items = q.slice(0)
+        q.length = 0
+        items.forEach((it) => {
+          try {
+            if (it && it.length >= 2) window.__WCE_PAGE_LOADED__(it[0], it[1])
+          } catch {}
+        })
+      }
+    } catch {}
+
+    const requestLoad = () => {
+      if (loading) return
+      if (nextPage < 1) return
+      const n = nextPage
+
+      loading = true
+      updateUi()
+
+      const s = document.createElement('script')
+      s.async = true
+      s.src = pageSrc(n)
+      s.onerror = () => {
+        loading = false
+        updateUi('加载失败，可重试')
+      }
+      try { document.body.appendChild(s) } catch {
+        loading = false
+        updateUi('加载失败，可重试')
+      }
+    }
+
+    btn.addEventListener('click', () => requestLoad())
+
+    let lastScrollAt = 0
+    container.addEventListener('scroll', () => {
+      const now = Date.now()
+      if (now - lastScrollAt < 200) return
+      lastScrollAt = now
+      if (container.scrollTop < 120) requestLoad()
+    })
+
+    updateUi()
+  }
+
  const isMaybeMd5 = (value) => /^[0-9a-f]{32}$/i.test(String(value || '').trim())
  const pickFirstMd5 = (...values) => {
    for (const v of values) {
@@ -926,28 +1065,90 @@ _HTML_EXPORT_JS = r"""

    const getText = (node, tag) => {
      try {
-        const el = node.getElementsByTagName(tag)?.[0]
+        if (!node) return ''
+        const els = Array.from(node.getElementsByTagName(tag) || [])
+        const direct = els.find((el) => el && el.parentNode === node)
+        const el = direct || els[0]
        return String(el?.textContent || '').trim()
      } catch {
        return ''
      }
    }

+    const getDirectChildXml = (node, tag) => {
+      try {
+        if (!node) return ''
+        const children = Array.from(node.children || [])
+        const el = children.find((c) => String(c?.tagName || '').toLowerCase() === String(tag || '').toLowerCase())
+        if (!el) return ''
+
+        const raw = String(el.textContent || '').trim()
+        if (raw && raw.startsWith('<') && raw.endsWith('>')) return raw
+
+        if (typeof XMLSerializer !== 'undefined') {
+          return new XMLSerializer().serializeToString(el)
+        }
+      } catch {}
+      return ''
+    }
+
+    const getAnyXml = (node, tag) => {
+      try {
+        if (!node) return ''
+        const els = Array.from(node.getElementsByTagName(tag) || [])
+        const direct = els.find((el) => el && el.parentNode === node)
+        const el = direct || els[0]
+        if (!el) return ''
+
+        const raw = String(el.textContent || '').trim()
+        if (raw && raw.startsWith('<') && raw.endsWith('>')) return raw
+        if (typeof XMLSerializer !== 'undefined') return new XMLSerializer().serializeToString(el)
+      } catch {}
+      return ''
+    }
+
+    const sameTag = (el, tag) => String(el?.tagName || '').toLowerCase() === String(tag || '').toLowerCase()
+
+    const closestAncestorByTag = (node, tag) => {
+      const lower = String(tag || '').toLowerCase()
+      let cur = node
+      while (cur) {
+        if (cur.nodeType === 1 && String(cur.tagName || '').toLowerCase() === lower) return cur
+        cur = cur.parentNode
+      }
+      return null
+    }
+
    const root = doc?.documentElement
    const isChatRoom = String(getText(root, 'isChatRoom') || '').trim() === '1'
    const title = getText(root, 'title')
    const desc = getText(root, 'desc') || getText(root, 'info')

-    const items = Array.from(doc.getElementsByTagName('dataitem') || [])
-    const parsed = items.map((node, idx) => {
-      const datatype = String(node.getAttribute('datatype') || '').trim()
-      const dataid = String(node.getAttribute('dataid') || '').trim() || String(idx)
+    const datalist = (() => {
+      try {
+        const all = Array.from(doc.getElementsByTagName('datalist') || [])
+        const top = root ? all.find((el) => closestAncestorByTag(el, 'recorditem') === root) : null
+        return top || all[0] || null
+      } catch {
+        return null
+      }
+    })()
+
+    const itemNodes = (() => {
+      if (datalist) return Array.from(datalist.children || []).filter((el) => sameTag(el, 'dataitem'))
+      return Array.from(root?.children || []).filter((el) => sameTag(el, 'dataitem'))
+    })()
+
+    const parsed = itemNodes.map((node, idx) => {
+      const datatype = String(node.getAttribute('datatype') || getText(node, 'datatype') || '').trim()
+      const dataid = String(node.getAttribute('dataid') || getText(node, 'dataid') || '').trim() || String(idx)

      const sourcename = getText(node, 'sourcename')
      const sourcetime = getText(node, 'sourcetime')
      const sourceheadurl = normalizeChatHistoryUrl(getText(node, 'sourceheadurl'))
      const datatitle = getText(node, 'datatitle')
      const datadesc = getText(node, 'datadesc')
+      const link = normalizeChatHistoryUrl(getText(node, 'link') || getText(node, 'dataurl') || getText(node, 'url'))
      const datafmt = getText(node, 'datafmt')
      const duration = getText(node, 'duration')

@@ -961,6 +1162,7 @@ _HTML_EXPORT_JS = r"""
      const fromnewmsgid = getText(node, 'fromnewmsgid')
      const srcMsgLocalid = getText(node, 'srcMsgLocalid')
      const srcMsgCreateTime = getText(node, 'srcMsgCreateTime')
+      const nestedRecordItem = getAnyXml(node, 'recorditem') || getDirectChildXml(node, 'recorditem') || getText(node, 'recorditem')

      let content = datatitle || datadesc
      if (!content) {
@@ -975,7 +1177,11 @@ _HTML_EXPORT_JS = r"""
      const imageFormats = new Set(['jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'heic', 'heif'])

      let renderType = 'text'
-      if (datatype === '4' || String(duration || '').trim() || fmt === 'mp4') {
+      if (datatype === '17') {
+        renderType = 'chatHistory'
+      } else if (datatype === '5' || link) {
+        renderType = 'link'
+      } else if (datatype === '4' || String(duration || '').trim() || fmt === 'mp4') {
        renderType = 'video'
      } else if (datatype === '47' || datatype === '37') {
        renderType = 'emoji'
@@ -990,6 +1196,23 @@ _HTML_EXPORT_JS = r"""
        renderType = 'emoji'
      }

+      let outTitle = ''
+      let outUrl = ''
+      let recordItem = ''
+      if (renderType === 'chatHistory') {
+        outTitle = datatitle || content || '聊天记录'
+        content = datadesc || ''
+        recordItem = nestedRecordItem
+      } else if (renderType === 'link') {
+        outTitle = datatitle || content || ''
+        outUrl = link || externurl || ''
+        // datadesc can be an invisible filler; only keep as description when meaningful.
+        const cleanDesc = String(datadesc || '').replace(/[\\u3164\\u2800]/g, '').trim()
+        const cleanTitle = String(outTitle || '').replace(/[\\u3164\\u2800]/g, '').trim()
+        if (!cleanDesc || (cleanTitle && cleanDesc === cleanTitle)) content = ''
+        else content = String(datadesc || '').trim()
+      }
+
      return {
        id: dataid,
        datatype,
@@ -1009,6 +1232,9 @@ _HTML_EXPORT_JS = r"""
        srcMsgLocalid,
        srcMsgCreateTime,
        renderType,
+        title: outTitle,
+        recordItem,
+        url: outUrl,
        content
      }
    })
@@ -1028,15 +1254,64 @@ _HTML_EXPORT_JS = r"""
    if (!modal || !titleEl || !closeBtn || !emptyEl || !listEl) return

    const mediaIndex = readMediaIndex()
+    let historyStack = []
+    let currentState = null
+    let backBtn = null
+
+    const updateBackVisibility = () => {
+      if (!backBtn) return
+      const show = Array.isArray(historyStack) && historyStack.length > 0
+      try { backBtn.classList.toggle('hidden', !show) } catch {}
+    }
+
+    // Add a back button next to the title (created at runtime to avoid changing the HTML template).
+    try {
+      const header = titleEl.parentElement
+      if (header) {
+        const wrap = document.createElement('div')
+        wrap.className = 'flex items-center gap-2 min-w-0'
+
+        backBtn = document.createElement('button')
+        backBtn.type = 'button'
+        backBtn.className = 'p-2 rounded hover:bg-black/5 flex-shrink-0 hidden'
+        try { backBtn.setAttribute('aria-label', '返回') } catch {}
+        try { backBtn.setAttribute('title', '返回') } catch {}
+        backBtn.innerHTML = '<svg class="w-5 h-5 text-gray-700" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" /></svg>'
+
+        header.insertBefore(wrap, titleEl)
+        wrap.appendChild(backBtn)
+        wrap.appendChild(titleEl)
+      }
+    } catch {}

    const close = () => {
      try { modal.classList.add('hidden') } catch {}
      try { modal.style.display = 'none' } catch {}
      try { modal.setAttribute('aria-hidden', 'true') } catch {}
      try { document.body.style.overflow = '' } catch {}
-      try { titleEl.textContent = '合并消息' } catch {}
+      try { titleEl.textContent = '聊天记录' } catch {}
      try { listEl.textContent = '' } catch {}
      try { emptyEl.style.display = '' } catch {}
+      historyStack = []
+      currentState = null
+      updateBackVisibility()
+    }
+
+    const buildChatHistoryState = (payload) => {
+      const title = String(payload?.title || '聊天记录').trim() || '聊天记录'
+      const xml = String(payload?.recordItem || '').trim()
+      const parsed = parseChatHistoryRecord(xml)
+      const info = (parsed && parsed.info) ? parsed.info : { isChatRoom: false }
+      let records = (parsed && Array.isArray(parsed.items)) ? parsed.items : []
+
+      if (!records.length) {
+        const lines = Array.isArray(payload?.fallbackLines)
+          ? payload.fallbackLines
+          : String(payload?.content || '').trim().split(/\r?\n/).map((x) => String(x || '').trim()).filter(Boolean)
+        records = lines.map((line, idx) => ({ id: String(idx), renderType: 'text', content: line, sourcename: '', sourcetime: '' }))
+      }
+
+      return { title, info, records }
    }

    const renderRecordRow = (rec, info) => {
@@ -1102,7 +1377,123 @@ _HTML_EXPORT_JS = r"""
      const serverId = String(rec?.fromnewmsgid || '').trim()
      const serverMd5 = resolveServerMd5(mediaIndex, serverId)

-      if (rt === 'video') {
+      if (rt === 'chatHistory') {
+        const card = document.createElement('div')
+        card.className = 'wechat-chat-history-card wechat-special-card msg-radius'
+
+        const chBody = document.createElement('div')
+        chBody.className = 'wechat-chat-history-body'
+
+        const chTitle = document.createElement('div')
+        chTitle.className = 'wechat-chat-history-title'
+        chTitle.textContent = String(rec?.title || '聊天记录')
+        chBody.appendChild(chTitle)
+
+        const raw = String(rec?.content || '').trim()
+        const lines = raw ? raw.split(/\r?\n/).map((x) => String(x || '').trim()).filter(Boolean).slice(0, 4) : []
+        if (lines.length) {
+          const preview = document.createElement('div')
+          preview.className = 'wechat-chat-history-preview'
+          for (const line of lines) {
+            const el = document.createElement('div')
+            el.className = 'wechat-chat-history-line'
+            el.textContent = line
+            preview.appendChild(el)
+          }
+          chBody.appendChild(preview)
+        }
+
+        card.appendChild(chBody)
+
+        const bottom = document.createElement('div')
+        bottom.className = 'wechat-chat-history-bottom'
+        const label = document.createElement('span')
+        label.textContent = '聊天记录'
+        bottom.appendChild(label)
+        card.appendChild(bottom)
+
+        const nestedXml = String(rec?.recordItem || '').trim()
+        if (nestedXml) {
+          card.classList.add('cursor-pointer')
+          card.addEventListener('click', (ev) => {
+            try { ev.preventDefault() } catch {}
+            try { ev.stopPropagation() } catch {}
+            openNestedChatHistory(rec)
+          })
+        }
+
+        body.appendChild(card)
+      } else if (rt === 'link') {
+        const href = normalizeChatHistoryUrl(rec?.url) || normalizeChatHistoryUrl(rec?.externurl)
+        const heading = String(rec?.title || '').trim() || content || href || '链接'
+        const desc = String(rec?.content || '').trim()
+
+        const thumbMd5 = pickFirstMd5(rec?.fullmd5, rec?.thumbfullmd5, rec?.md5)
+        let previewUrl = resolveMd5Any(mediaIndex, thumbMd5)
+        if (!previewUrl && serverMd5) previewUrl = resolveMd5Any(mediaIndex, serverMd5)
+        if (!previewUrl) previewUrl = resolveRemoteAny(mediaIndex, rec?.externurl, rec?.cdnurlstring, rec?.encrypturlstring)
+
+        const card = document.createElement(href ? 'a' : 'div')
+        card.className = 'wechat-link-card wechat-special-card msg-radius cursor-pointer'
+        if (href) {
+          card.href = href
+          card.target = '_blank'
+          card.rel = 'noreferrer noopener'
+        }
+        try { card.style.textDecoration = 'none' } catch {}
+        try { card.style.outline = 'none' } catch {}
+
+        const linkContent = document.createElement('div')
+        linkContent.className = 'wechat-link-content'
+
+        const linkInfo = document.createElement('div')
+        linkInfo.className = 'wechat-link-info'
+        const titleEl = document.createElement('div')
+        titleEl.className = 'wechat-link-title'
+        titleEl.textContent = heading
+        linkInfo.appendChild(titleEl)
+        if (desc) {
+          const descEl = document.createElement('div')
+          descEl.className = 'wechat-link-desc'
+          descEl.textContent = desc
+          linkInfo.appendChild(descEl)
+        }
+        linkContent.appendChild(linkInfo)
+
+        if (previewUrl) {
+          const thumb = document.createElement('div')
+          thumb.className = 'wechat-link-thumb'
+          const img = document.createElement('img')
+          img.src = previewUrl
+          img.alt = heading || '链接预览'
+          img.className = 'wechat-link-thumb-img'
+          try { img.referrerPolicy = 'no-referrer' } catch {}
+          thumb.appendChild(img)
+          linkContent.appendChild(thumb)
+        }
+
+        card.appendChild(linkContent)
+
+        const fromRow = document.createElement('div')
+        fromRow.className = 'wechat-link-from'
+        const fromText = (() => {
+          const f0 = String(rec?.from || '').trim()
+          if (f0) return f0
+          try { return href ? (new URL(href).hostname || '') : '' } catch { return '' }
+        })()
+        const fromAvatarText = fromText ? (Array.from(fromText)[0] || '') : ''
+        const fromAvatar = document.createElement('div')
+        fromAvatar.className = 'wechat-link-from-avatar'
+        fromAvatar.textContent = fromAvatarText || '\u200B'
+        const fromName = document.createElement('div')
+        fromName.className = 'wechat-link-from-name'
+        fromName.textContent = fromText || '\u200B'
+        fromRow.appendChild(fromAvatar)
+        fromRow.appendChild(fromName)
+        card.appendChild(fromRow)
+
+        body.appendChild(card)
+      } else if (rt === 'video') {
        const videoMd5 = pickFirstMd5(rec?.fullmd5, rec?.md5)
        const thumbMd5 = pickFirstMd5(rec?.thumbfullmd5) || videoMd5
        let videoUrl = resolveMd5Any(mediaIndex, videoMd5)
@@ -1202,20 +1593,11 @@ _HTML_EXPORT_JS = r"""
      return row
    }

-    const openFromCard = (card) => {
-      const title = String(card?.getAttribute('data-title') || '合并消息').trim() || '合并消息'
-      const b64 = String(card?.getAttribute('data-record-item-b64') || '').trim()
-      const xml = decodeBase64Utf8(b64)
-      const parsed = parseChatHistoryRecord(xml)
-      const info = (parsed && parsed.info) ? parsed.info : { isChatRoom: false }
-      let records = (parsed && Array.isArray(parsed.items)) ? parsed.items : []
-
-      if (!records.length) {
-        const lines = Array.from(card.querySelectorAll('.wechat-chat-history-line') || [])
-          .map((el) => String(el?.textContent || '').trim())
-          .filter(Boolean)
-        records = lines.map((line, idx) => ({ id: String(idx), renderType: 'text', content: line, sourcename: '', sourcetime: '' }))
-      }
+    const applyChatHistoryState = (state) => {
+      currentState = state
+      const title = String(state?.title || '聊天记录').trim() || '聊天记录'
+      const info = state?.info || { isChatRoom: false }
+      const records = Array.isArray(state?.records) ? state.records : []

      try { titleEl.textContent = title } catch {}
      try { listEl.textContent = '' } catch {}
@@ -1231,6 +1613,45 @@ _HTML_EXPORT_JS = r"""
        }
      }

+      updateBackVisibility()
+    }
+
+    const openNestedChatHistory = (rec) => {
+      const xml = String(rec?.recordItem || '').trim()
+      if (!xml) return
+      if (currentState) {
+        historyStack = [...historyStack, currentState]
+      }
+      const state = buildChatHistoryState({
+        title: String(rec?.title || '聊天记录'),
+        recordItem: xml,
+        content: String(rec?.content || ''),
+      })
+      applyChatHistoryState(state)
+    }
+
+    if (backBtn) {
+      backBtn.addEventListener('click', (ev) => {
+        try { ev.preventDefault() } catch {}
+        if (!Array.isArray(historyStack) || !historyStack.length) return
+        const prev = historyStack[historyStack.length - 1]
+        historyStack = historyStack.slice(0, -1)
+        applyChatHistoryState(prev)
+      })
+    }
+
+    const openFromCard = (card) => {
+      const title = String(card?.getAttribute('data-title') || '聊天记录').trim() || '聊天记录'
+      const b64 = String(card?.getAttribute('data-record-item-b64') || '').trim()
+      const xml = decodeBase64Utf8(b64)
+      const lines = Array.from(card.querySelectorAll('.wechat-chat-history-line') || [])
+        .map((el) => String(el?.textContent || '').trim())
+        .filter(Boolean)
+
+      historyStack = []
+      const state = buildChatHistoryState({ title, recordItem: xml, fallbackLines: lines })
+      applyChatHistoryState(state)
+
      try { modal.classList.remove('hidden') } catch {}
      try { modal.style.display = 'flex' } catch {}
      try { modal.setAttribute('aria-hidden', 'false') } catch {}
@@ -1269,6 +1690,7 @@ _HTML_EXPORT_JS = r"""
    initSessionSearch()
    initVoicePlayback()
    initChatHistoryModal()
+    initPagedMessageLoading()

    const select = document.getElementById('messageTypeFilter')
    if (select) {
@@ -1469,6 +1891,7 @@ class ChatExportManager:
        output_dir: Optional[str],
        allow_process_key_extract: bool,
        download_remote_media: bool,
+        html_page_size: int = 1000,
        privacy_mode: bool,
        file_name: Optional[str],
    ) -> ExportJob:
@@ -1493,6 +1916,7 @@ class ChatExportManager:
                "outputDir": str(output_dir or "").strip(),
                "allowProcessKeyExtract": bool(allow_process_key_extract),
                "downloadRemoteMedia": bool(download_remote_media),
+                "htmlPageSize": int(html_page_size) if int(html_page_size or 0) > 0 else int(html_page_size or 0),
                "privacyMode": bool(privacy_mode),
                "fileName": str(file_name or "").strip(),
            },
@@ -1544,6 +1968,12 @@ class ChatExportManager:
        allow_process_key_extract = bool(opts.get("allowProcessKeyExtract"))
        download_remote_media = bool(opts.get("downloadRemoteMedia"))
        privacy_mode = bool(opts.get("privacyMode"))
+        try:
+            html_page_size = int(opts.get("htmlPageSize") or 1000)
+        except Exception:
+            html_page_size = 1000
+        if html_page_size < 0:
+            html_page_size = 0

        media_kinds_raw = opts.get("mediaKinds") or []
        media_kinds: list[MediaKind] = []
@@ -1898,6 +2328,7 @@ class ChatExportManager:
                            session_items=session_items,
                            download_remote_media=remote_download_enabled,
                            remote_written=remote_written,
+                            html_page_size=html_page_size,
                            start_time=st,
                            end_time=et,
                            want_types=want_types,
@@ -2045,6 +2476,7 @@ class ChatExportManager:
                        "mediaKinds": media_kinds,
                        "allowProcessKeyExtract": allow_process_key_extract,
                        "downloadRemoteMedia": bool(download_remote_media),
+                        "htmlPageSize": int(html_page_size) if export_format == "html" else None,
                        "privacyMode": privacy_mode,
                    },
                    "stats": {
@@ -3110,6 +3542,7 @@ def _write_conversation_html(
    session_items: list[dict[str, Any]],
    download_remote_media: bool,
    remote_written: dict[str, str],
+    html_page_size: int = 1000,
    start_time: Optional[int],
    end_time: Optional[int],
    want_types: Optional[set[str]],
@@ -3499,7 +3932,7 @@ def _write_conversation_html(
        ("emoji", "表情"),
        ("video", "视频"),
        ("voice", "语音"),
-        ("chatHistory", "合并消息"),
+        ("chatHistory", "聊天记录"),
        ("transfer", "转账"),
        ("redPacket", "红包"),
        ("file", "文件"),
@@ -3509,10 +3942,46 @@ def _write_conversation_html(
        ("voip", "通话"),
    ]

+    page_size = 0
+    try:
+        page_size = int(html_page_size or 0)
+    except Exception:
+        page_size = 0
+    if page_size < 0:
+        page_size = 0
+
    # NOTE: write to a temp file first to avoid zip interleaving writes.
    with tempfile.TemporaryDirectory(prefix="wechat_chat_export_") as tmp_dir:
        tmp_path = Path(tmp_dir) / "messages.html"
-        with open(tmp_path, "w", encoding="utf-8", newline="\n") as tw:
+        pages_frag_dir = Path(tmp_dir) / "pages_fragments"
+        page_frag_paths: list[Path] = []
+        paged_old_page_paths: list[Path] = []
+        paged_total_pages = 1
+        paged_pad_width = 4
+        with open(tmp_path, "w", encoding="utf-8", newline="\n") as hw:
+            class _WriteProxy:
+                def __init__(self, default_target):
+                    self._default = default_target
+                    self._target = default_target
+
+                def set_target(self, target) -> None:
+                    self._target = target or self._default
+
+                def write(self, s: str) -> Any:
+                    return self._target.write(s)
+
+                def flush(self) -> None:
+                    try:
+                        if self._target is not self._default:
+                            self._target.flush()
+                    except Exception:
+                        pass
+                    try:
+                        self._default.flush()
+                    except Exception:
+                        pass
+
+            tw = _WriteProxy(hw)
            tw.write("<!doctype html>\n")
            tw.write('<html lang="zh-CN">\n')
            tw.write("<head>\n")
@@ -3688,6 +4157,55 @@ def _write_conversation_html(
            tw.write("        </div>\n")

            tw.write('        <div id="messageContainer" class="wce-message-container flex-1 overflow-y-auto p-4 min-h-0">\n')
+            tw.write('          <div id="wcePager" class="wce-pager" style="display:none">\n')
+            tw.write('            <button id="wceLoadPrevBtn" type="button" class="wce-pager-btn">加载更早消息</button>\n')
+            tw.write('            <span id="wceLoadPrevStatus" class="wce-pager-status"></span>\n')
+            tw.write("          </div>\n")
+            tw.write('          <div id="wceMessageList">\n')
+
+            page_fp = None
+            page_fp_path: Optional[Path] = None
+            page_no = 1
+            page_msg_count = 0
+
+            def _open_page_fp() -> Any:
+                nonlocal page_fp, page_fp_path
+                pages_frag_dir.mkdir(parents=True, exist_ok=True)
+                page_fp_path = pages_frag_dir / f"page_{page_no}.htmlfrag"
+                page_fp = open(page_fp_path, "w", encoding="utf-8", newline="\n")
+                return page_fp
+
+            def _close_page_fp() -> None:
+                nonlocal page_fp, page_fp_path
+                if page_fp is None:
+                    page_fp_path = None
+                    return
+                try:
+                    page_fp.flush()
+                except Exception:
+                    pass
+                try:
+                    page_fp.close()
+                except Exception:
+                    pass
+                if page_fp_path is not None:
+                    page_frag_paths.append(page_fp_path)
+                page_fp = None
+                page_fp_path = None
+                tw.set_target(hw)
+
+            def _mark_exported() -> None:
+                nonlocal exported, page_no, page_msg_count
+                exported += 1
+                with lock:
+                    job.progress.messages_exported += 1
+                    job.progress.current_conversation_messages_exported = exported
+                if page_size > 0:
+                    page_msg_count += 1
+                    if page_msg_count >= page_size:
+                        _close_page_fp()
+                        page_no += 1
+                        page_msg_count = 0

            sender_alias_map: dict[str, int] = {}
            prev_ts = 0
@@ -3755,6 +4273,11 @@ def _write_conversation_html(
                if ts and ((prev_ts == 0) or (abs(ts - prev_ts) >= 300)):
                    show_divider = True

+                if page_size > 0:
+                    if page_fp is None:
+                        _open_page_fp()
+                    tw.set_target(page_fp)
+
                if show_divider:
                    divider_text = _format_session_time(ts)
                    if divider_text:
@@ -3770,10 +4293,7 @@ def _write_conversation_html(
                    tw.write(f'              <div class="px-3 py-1 text-xs text-[#9e9e9e]">{esc_text(msg.get("content") or "")}</div>\n')
                    tw.write("            </div>\n")
                    tw.write("          </div>\n")
-                    exported += 1
-                    with lock:
-                        job.progress.messages_exported += 1
-                        job.progress.current_conversation_messages_exported = exported
+                    _mark_exported()
                    if ts:
                        prev_ts = ts
                    continue
@@ -4186,7 +4706,7 @@ def _write_conversation_html(

                        tw.write("                  </div>\n")
                elif rt == "chatHistory":
-                    title = str(msg.get("title") or "").strip() or "合并消息"
+                    title = str(msg.get("title") or "").strip() or "聊天记录"
                    record_item = str(msg.get("recordItem") or "").strip()
                    record_item_b64 = ""
                    if record_item:
@@ -4260,7 +4780,7 @@ def _write_conversation_html(
                            tw.write(f'                        <div class="wechat-chat-history-line">{esc_text(line)}</div>\n')
                        tw.write("                      </div>\n")
                    tw.write("                    </div>\n")
-                    tw.write('                    <div class="wechat-chat-history-bottom"><span>合并消息</span></div>\n')
+                    tw.write('                    <div class="wechat-chat-history-bottom"><span>聊天记录</span></div>\n')
                    tw.write("                  </div>\n")
                elif rt == "transfer":
                    received = is_transfer_received(msg)
@@ -4328,17 +4848,55 @@ def _write_conversation_html(
                tw.write("            </div>\n")
                tw.write("          </div>\n")

-                exported += 1
-                with lock:
-                    job.progress.messages_exported += 1
-                    job.progress.current_conversation_messages_exported = exported
+                _mark_exported()
                if ts:
                    prev_ts = ts

                if scanned % 500 == 0 and job.cancel_requested:
                    raise _JobCancelled()

+            if page_size > 0:
+                _close_page_fp()
+                paged_total_pages = max(1, len(page_frag_paths))
+                paged_pad_width = max(4, len(str(paged_total_pages)))
+                if page_frag_paths:
+                    paged_old_page_paths = list(page_frag_paths[:-1])
+                    tw.set_target(hw)
+                    try:
+                        tw.write(page_frag_paths[-1].read_text(encoding="utf-8"))
+                    except Exception:
+                        try:
+                            tw.write(page_frag_paths[-1].read_text(encoding="utf-8", errors="ignore"))
+                        except Exception:
+                            pass
+                else:
+                    paged_old_page_paths = []
+                    tw.set_target(hw)
+
+            # Close message list + container
+            tw.set_target(hw)
+            tw.write("          </div>\n")
            tw.write("        </div>\n")
+
+            if page_size > 0 and paged_total_pages > 1:
+                page_meta = {
+                    "schemaVersion": 1,
+                    "pageSize": int(page_size),
+                    "totalPages": int(paged_total_pages),
+                    "initialPage": int(paged_total_pages),
+                    "totalMessages": int(exported),
+                    "padWidth": int(paged_pad_width),
+                    "pageFilePrefix": "pages/page-",
+                    "pageFileSuffix": ".js",
+                    "inlinedPages": [int(paged_total_pages)],
+                }
+                try:
+                    page_meta_payload = json.dumps(page_meta, ensure_ascii=False)
+                except Exception:
+                    page_meta_payload = "{}"
+                page_meta_payload = page_meta_payload.replace("</", "<\\/")
+                tw.write(f'<script type="application/json" id="wcePageMeta">{page_meta_payload}</script>\n')
+
            tw.write("      </div>\n")
            tw.write("    </div>\n")
            tw.write("  </div>\n")
@@ -4357,7 +4915,7 @@ def _write_conversation_html(
            )
            tw.write('  <div class="w-[92vw] max-w-[560px] max-h-[80vh] bg-white rounded-xl shadow-xl overflow-hidden flex flex-col" role="dialog" aria-modal="true">\n')
            tw.write('    <div class="px-4 py-3 bg-neutral-100 border-b border-gray-200 flex items-center justify-between">\n')
-            tw.write('      <div id="chatHistoryModalTitle" class="text-sm text-[#161616] truncate">合并消息</div>\n')
+            tw.write('      <div id="chatHistoryModalTitle" class="text-sm text-[#161616] truncate">聊天记录</div>\n')
            tw.write('      <button type="button" id="chatHistoryModalClose" class="p-2 rounded hover:bg-black/5" aria-label="关闭" title="关闭">\n')
            tw.write('        <svg class="w-5 h-5 text-gray-700" fill="none" stroke="currentColor" viewBox="0 0 24 24">\n')
            tw.write('          <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12"/>\n')
@@ -4365,7 +4923,7 @@ def _write_conversation_html(
            tw.write("      </button>\n")
            tw.write("    </div>\n")
            tw.write('    <div class="flex-1 overflow-auto bg-white">\n')
-            tw.write('      <div id="chatHistoryModalEmpty" class="text-sm text-gray-500 text-center py-10">没有可显示的合并消息</div>\n')
+            tw.write('      <div id="chatHistoryModalEmpty" class="text-sm text-gray-500 text-center py-10">没有可显示的聊天记录</div>\n')
            tw.write('      <div id="chatHistoryModalList"></div>\n')
            tw.write("    </div>\n")
            tw.write("  </div>\n")
@@ -4377,6 +4935,39 @@ def _write_conversation_html(

        zf.write(str(tmp_path), arcname)

+        if page_size > 0 and paged_old_page_paths:
+            for page_no, frag_path in enumerate(paged_old_page_paths, start=1):
+                try:
+                    frag_text = frag_path.read_text(encoding="utf-8")
+                except Exception:
+                    try:
+                        frag_text = frag_path.read_text(encoding="utf-8", errors="ignore")
+                    except Exception:
+                        frag_text = ""
+
+                try:
+                    frag_json = json.dumps(frag_text, ensure_ascii=False)
+                except Exception:
+                    frag_json = json.dumps("", ensure_ascii=False)
+
+                num = str(page_no).zfill(int(paged_pad_width or 4))
+                arc_js = f"{conv_dir}/pages/page-{num}.js"
+                js_payload = (
+                    "(() => {\n"
+                    f"  const pageNo = {int(page_no)};\n"
+                    f"  const html = {frag_json};\n"
+                    "  try {\n"
+                    "    const fn = window.__WCE_PAGE_LOADED__;\n"
+                    "    if (typeof fn === 'function') fn(pageNo, html);\n"
+                    "    else {\n"
+                    "      const q = (window.__WCE_PAGE_QUEUE__ = window.__WCE_PAGE_QUEUE__ || []);\n"
+                    "      q.push([pageNo, html]);\n"
+                    "    }\n"
+                    "  } catch {}\n"
+                    "})();\n"
+                )
+                zf.writestr(arc_js, js_payload)
+
    return exported


--- a/src/wechat_decrypt_tool/routers/chat.py
+++ b/src/wechat_decrypt_tool/routers/chat.py
--- a/src/wechat_decrypt_tool/routers/chat_export.py
+++ b/src/wechat_decrypt_tool/routers/chat_export.py
@@ -59,6 +59,10 @@ class ChatExportCreateRequest(BaseModel):
        False,
        description="HTML 导出时允许联网下载链接/引用缩略图等远程媒体（提高离线完整性）",
    )
+    html_page_size: int = Field(
+        1000,
+        description="HTML 导出分页大小（每页消息数）；<=0 表示禁用分页（单文件，打开大聊天可能很卡）",
+    )
    privacy_mode: bool = Field(
        False,
        description="隐私模式导出：隐藏会话/用户名/内容，不打包头像与媒体",
@@ -83,6 +87,7 @@ async def create_chat_export(req: ChatExportCreateRequest):
        output_dir=req.output_dir,
        allow_process_key_extract=req.allow_process_key_extract,
        download_remote_media=req.download_remote_media,
+        html_page_size=req.html_page_size,
        privacy_mode=req.privacy_mode,
        file_name=req.file_name,
    )
--- a/src/wechat_decrypt_tool/routers/chat_media.py
+++ b/src/wechat_decrypt_tool/routers/chat_media.py
@@ -1019,6 +1019,171 @@ async def proxy_image(url: str):
    return resp


+def _origin_favicon_url(page_url: str) -> str:
+    """Best-effort favicon URL for a given page URL (origin + /favicon.ico)."""
+    u = str(page_url or "").strip()
+    if not u:
+        return ""
+    try:
+        p = urlparse(u)
+    except Exception:
+        return ""
+    if not p.scheme or not p.netloc:
+        return ""
+    return f"{p.scheme}://{p.netloc}/favicon.ico"
+
+
+def _resolve_final_url_for_favicon(page_url: str) -> str:
+    """Resolve final URL for redirects (used for favicon host inference)."""
+    u = str(page_url or "").strip()
+    if not u:
+        return ""
+
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    }
+
+    # Prefer HEAD (no body). Some hosts reject HEAD; fall back to GET+stream.
+    try:
+        r = requests.head(u, headers=headers, timeout=10, allow_redirects=True)
+        try:
+            final = str(getattr(r, "url", "") or "").strip()
+            return final or u
+        finally:
+            try:
+                r.close()
+            except Exception:
+                pass
+    except Exception:
+        pass
+
+    try:
+        r = requests.get(u, headers=headers, timeout=10, allow_redirects=True, stream=True)
+        try:
+            final = str(getattr(r, "url", "") or "").strip()
+            return final or u
+        finally:
+            try:
+                r.close()
+            except Exception:
+                pass
+    except Exception:
+        return u
+
+
+@router.get("/api/chat/media/favicon", summary="获取网站 favicon（用于链接卡片来源头像）")
+async def get_favicon(url: str):
+    page_url = html.unescape(str(url or "")).strip()
+    if not page_url:
+        raise HTTPException(status_code=400, detail="Missing url.")
+    if not _is_safe_http_url(page_url):
+        raise HTTPException(status_code=400, detail="Invalid url (only public http/https allowed).")
+
+    # Resolve redirects first (e.g. b23.tv -> www.bilibili.com), so cached favicons are hit early.
+    final_url = _resolve_final_url_for_favicon(page_url)
+    candidates: list[str] = []
+    for u in (final_url, page_url):
+        fav = _origin_favicon_url(u)
+        if fav and fav not in candidates:
+            candidates.append(fav)
+
+    proxy_account = "_favicon"
+    max_bytes = 512 * 1024  # favicons should be small; protect against huge downloads.
+
+    for cand in candidates:
+        if not _is_safe_http_url(cand):
+            continue
+        source_url = normalize_avatar_source_url(cand)
+
+        cache_entry = get_avatar_cache_url_entry(proxy_account, source_url) if is_avatar_cache_enabled() else None
+        cache_file = avatar_cache_entry_file_exists(proxy_account, cache_entry)
+        if cache_entry and cache_file and avatar_cache_entry_is_fresh(cache_entry):
+            logger.info(f"[avatar_cache_hit] kind=favicon account={proxy_account} url={source_url}")
+            touch_avatar_cache_entry(proxy_account, cache_key_for_avatar_url(source_url))
+            headers = build_avatar_cache_response_headers(cache_entry)
+            return FileResponse(
+                str(cache_file),
+                media_type=str(cache_entry.get("media_type") or "application/octet-stream"),
+                headers=headers,
+            )
+
+        # Download favicon bytes (best-effort)
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
+            "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
+        }
+        r = None
+        try:
+            r = requests.get(source_url, headers=headers, timeout=20, stream=True, allow_redirects=True)
+            if int(getattr(r, "status_code", 0) or 0) != 200:
+                continue
+
+            ct = str((getattr(r, "headers", {}) or {}).get("Content-Type") or "").strip()
+            try:
+                cl = int((getattr(r, "headers", {}) or {}).get("content-length") or 0)
+            except Exception:
+                cl = 0
+            if cl and cl > max_bytes:
+                raise HTTPException(status_code=413, detail="Remote favicon too large.")
+
+            chunks: list[bytes] = []
+            total = 0
+            for chunk in r.iter_content(chunk_size=64 * 1024):
+                if not chunk:
+                    continue
+                chunks.append(chunk)
+                total += len(chunk)
+                if total > max_bytes:
+                    raise HTTPException(status_code=413, detail="Remote favicon too large.")
+            data = b"".join(chunks)
+        except HTTPException:
+            raise
+        except Exception:
+            continue
+        finally:
+            if r is not None:
+                try:
+                    r.close()
+                except Exception:
+                    pass
+
+        if not data:
+            continue
+
+        payload, media_type, _ext = _detect_media_type_and_ext(data)
+        if media_type == "application/octet-stream" and ct:
+            try:
+                mt = ct.split(";")[0].strip()
+                if mt.startswith("image/"):
+                    media_type = mt
+            except Exception:
+                pass
+
+        if not str(media_type or "").startswith("image/"):
+            continue
+
+        if is_avatar_cache_enabled():
+            entry, out_path = write_avatar_cache_payload(
+                proxy_account,
+                source_kind="url",
+                source_url=source_url,
+                payload=payload,
+                media_type=media_type,
+                ttl_seconds=AVATAR_CACHE_TTL_SECONDS,
+            )
+            if entry and out_path:
+                logger.info(f"[avatar_cache_download] kind=favicon account={proxy_account} url={source_url}")
+                headers = build_avatar_cache_response_headers(entry)
+                return FileResponse(str(out_path), media_type=media_type, headers=headers)
+
+        resp = Response(content=payload, media_type=media_type)
+        resp.headers["Cache-Control"] = f"public, max-age={AVATAR_CACHE_TTL_SECONDS}"
+        return resp
+
+    raise HTTPException(status_code=404, detail="favicon not found.")
+
+
@router.post("/api/chat/media/emoji/download", summary="下载表情消息资源到本地 resource")
 async def download_chat_emoji(req: EmojiDownloadRequest):
    md5 = str(req.md5 or "").strip().lower()