diff --git a/src/wechat_decrypt_tool/perf_trace.py b/src/wechat_decrypt_tool/perf_trace.py new file mode 100644 index 0000000..7a1a563 --- /dev/null +++ b/src/wechat_decrypt_tool/perf_trace.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import json +import threading +import time +from typing import Any, Callable + + +def create_perf_trace(logger: Any, category: str, **base_fields: Any) -> tuple[str, Callable[[str], None]]: + trace_id = f"{category}-{int(time.time() * 1000)}-{threading.get_ident()}" + started_at = time.perf_counter() + last_at = started_at + + def log(phase: str, **fields: Any) -> None: + nonlocal last_at + now = time.perf_counter() + payload = { + **base_fields, + **fields, + "elapsedMs": round((now - started_at) * 1000.0, 1), + "deltaMs": round((now - last_at) * 1000.0, 1), + } + last_at = now + try: + payload_text = json.dumps(payload, ensure_ascii=False, default=str) + except Exception: + payload_text = str(payload) + logger.info("[%s] %s %s %s", trace_id, category, phase, payload_text) + + return trace_id, log diff --git a/src/wechat_decrypt_tool/routers/chat.py b/src/wechat_decrypt_tool/routers/chat.py index ae117af..25da249 100644 --- a/src/wechat_decrypt_tool/routers/chat.py +++ b/src/wechat_decrypt_tool/routers/chat.py @@ -73,6 +73,7 @@ from ..app_paths import get_output_dir from ..database_filters import list_countable_database_names from ..key_store import remove_account_keys_from_store from ..path_fix import PathFixRoute +from ..perf_trace import create_perf_trace from ..session_last_message import ( build_session_last_message_table, get_session_last_message_status, @@ -3998,6 +3999,17 @@ def list_chat_sessions( contact_db_path = account_dir / "contact.db" head_image_db_path = account_dir / "head_image.db" base_url = str(request.base_url).rstrip("/") + _trace_id, trace = create_perf_trace( + logger, + "chat.sessions", + account=account_dir.name, + source=source_norm or "default", + limit=int(limit), + includeHidden=bool(include_hidden), + includeOfficial=bool(include_official), + preview=str(preview or ""), + ) + trace("request:start") rt_conn = None rows: list[Any] @@ -4122,6 +4134,12 @@ def list_chat_sessions( finally: sconn.close() + trace( + "rows:loaded", + rawCount=len(rows or []), + realtime=bool(source_norm == "realtime"), + ) + filtered: list[Any] = [] for r in rows: username = _session_row_get(r, "username", "") or "" @@ -4133,8 +4151,18 @@ def list_chat_sessions( continue filtered.append(r) + trace( + "rows:filtered", + filteredCount=len(filtered), + ) + raw_usernames = [str(_session_row_get(r, "username", "") or "").strip() for r in filtered] top_flags = _load_contact_top_flags(contact_db_path, raw_usernames) + trace( + "top-flags:loaded", + usernameCount=len(raw_usernames), + topCount=sum(1 for value in top_flags.values() if value), + ) def _to_int(v: Any) -> int: try: @@ -4164,6 +4192,12 @@ def list_chat_sessions( contact_rows = _load_contact_rows(contact_db_path, usernames) local_avatar_usernames = _query_head_image_usernames(head_image_db_path, usernames) + trace( + "contacts:loaded", + usernameCount=len(usernames), + contactRowCount=len(contact_rows), + localAvatarCount=len(local_avatar_usernames), + ) # Some sessions (notably enterprise groups / openim-related IDs) may be missing from decrypted contact.db # (or lack nickname/avatar columns). In that case, fall back to WCDB APIs (same as WeFlow) to resolve @@ -4212,6 +4246,12 @@ def list_chat_sessions( wcdb_display_names = {} wcdb_avatar_urls = {} + trace( + "wcdb-fallback:loaded", + displayNameCount=len(wcdb_display_names), + avatarUrlCount=len(wcdb_avatar_urls), + ) + preview_mode = str(preview or "").strip().lower() if preview_mode not in {"latest", "index", "session", "db", "none"}: preview_mode = "latest" @@ -4299,6 +4339,14 @@ def list_chat_sessions( except Exception: pass + trace( + "previews:resolved", + previewMode=preview_mode, + previewCount=len(last_previews), + groupSenderDisplayCount=len(group_sender_display_names), + unresolvedGroupSenderCount=len(unresolved), + ) + sessions: list[dict[str, Any]] = [] for r in filtered: username = r["username"] @@ -4416,6 +4464,10 @@ def list_chat_sessions( } ) + trace( + "response:ready", + sessionCount=len(sessions), + ) return { "status": "success", "account": account_dir.name, @@ -5169,11 +5221,24 @@ def list_chat_messages( head_image_db_path = account_dir / "head_image.db" message_resource_db_path = account_dir / "message_resource.db" base_url = str(request.base_url).rstrip("/") + _trace_id, trace = create_perf_trace( + logger, + "chat.messages", + account=account_dir.name, + username=username, + source=source_norm or "default", + limit=int(limit), + offset=int(offset), + order=str(order or ""), + renderTypes=str(render_types or ""), + ) + trace("request:start") db_paths: list[Path] = [] if source_norm != "realtime": db_paths = _iter_message_db_paths(account_dir) if not db_paths: + trace("response:error", reason="no-message-dbs") return { "status": "error", "account": account_dir.name, @@ -5199,6 +5264,12 @@ def list_chat_messages( resource_conn = None resource_chat_id = None + trace( + "resource-db:resolved", + hasResourceDb=bool(resource_conn is not None), + resourceChatId=int(resource_chat_id or 0), + ) + want_asc = str(order or "").lower() != "desc" want_types: Optional[set[str]] = None @@ -5337,6 +5408,16 @@ def list_chat_messages( break scan_take = next_take + trace( + "messages:collected", + scanTake=int(scan_take), + mergedCount=len(merged), + hasMoreAny=bool(has_more_any), + senderUsernameCount=len(sender_usernames), + quoteUsernameCount=len(quote_usernames), + patUsernameCount=len(pat_usernames), + ) + # Self-heal (default source only): if the decrypted snapshot has no conversation table yet (new session), # do a one-shot realtime->decrypted sync and re-query once. This avoids "暂无聊天记录" after turning off realtime. if ( @@ -5352,6 +5433,7 @@ def list_chat_messages( missing_table = True if missing_table: + trace("self-heal:missing-table") rt_conn2 = None try: rt_conn2 = WCDB_REALTIME.ensure_connected(account_dir) @@ -5362,6 +5444,7 @@ def list_chat_messages( if rt_conn2 is not None: try: + trace("self-heal:sync:start") with _realtime_sync_lock(account_dir.name, username): msg_db_path2, table_name2 = _ensure_decrypted_message_table(account_dir, username) _sync_chat_realtime_messages_for_table( @@ -5373,7 +5456,9 @@ def list_chat_messages( max_scan=max(200, int(limit) + 50), backfill_limit=0, ) + trace("self-heal:sync:end") except Exception: + trace("self-heal:sync:error") pass ( @@ -5393,6 +5478,11 @@ def list_chat_messages( ) if want_types is not None: merged = [m for m in merged if _normalize_render_type_key(m.get("renderType")) in want_types] + trace( + "self-heal:requery:end", + mergedCount=len(merged), + hasMoreAny=bool(has_more_any), + ) r""" take = int(limit) + int(offset) @@ -5886,8 +5976,17 @@ def list_chat_messages( if want_asc: page = list(reversed(page)) + trace( + "page:sliced", + mergedCount=len(merged), + pageCount=len(page), + hasMore=bool(has_more_global), + orderAsc=bool(want_asc), + ) + # Hot path optimization: only enrich the page we return. if not page: + trace("response:ready", pageCount=0) return { "status": "success", "account": account_dir.name, @@ -5961,6 +6060,12 @@ def list_chat_messages( ) sender_contact_rows = _load_contact_rows(contact_db_path, uniq_senders) local_sender_avatars = _query_head_image_usernames(head_image_db_path, uniq_senders) + trace( + "senders:loaded", + uniqSenderCount=len(uniq_senders), + senderContactRowCount=len(sender_contact_rows), + localSenderAvatarCount=len(local_sender_avatars), + ) # contact.db may not include enterprise/openim contacts (or group chatroom records). WCDB has a more complete # view of display names + avatar URLs, so we use it as a best-effort fallback. @@ -5997,6 +6102,12 @@ def list_chat_messages( chatroom_id=username, sender_usernames=uniq_senders, ) + trace( + "sender-fallbacks:loaded", + wcdbDisplayNameCount=len(wcdb_display_names), + wcdbAvatarUrlCount=len(wcdb_avatar_urls), + groupNicknameCount=len(group_nicknames), + ) for m in messages_window: # If appmsg doesn't provide sourcedisplayname, try mapping sourceusername to display name. @@ -6155,6 +6266,12 @@ def list_chat_messages( wcdb_display_names=wcdb_display_names, ) + trace( + "response:ready", + pageCount=len(page), + total=int(offset) + len(page) + (1 if has_more_global else 0), + hasMore=bool(has_more_global), + ) return { "status": "success", "account": account_dir.name, diff --git a/src/wechat_decrypt_tool/routers/chat_media.py b/src/wechat_decrypt_tool/routers/chat_media.py index 1b6656b..ad68295 100644 --- a/src/wechat_decrypt_tool/routers/chat_media.py +++ b/src/wechat_decrypt_tool/routers/chat_media.py @@ -60,6 +60,7 @@ from ..media_helpers import ( ) from ..chat_helpers import _extract_md5_from_packed_info, _load_contact_rows, _pick_avatar_url from ..path_fix import PathFixRoute +from ..perf_trace import create_perf_trace from ..wcdb_realtime import WCDB_REALTIME, get_avatar_urls as _wcdb_get_avatar_urls logger = get_logger(__name__) @@ -424,6 +425,13 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): account_dir = _resolve_account_dir(account) account_name = str(account_dir.name or "").strip() user_key = str(username or "").strip() + _trace_id, trace = create_perf_trace( + logger, + "chat.avatar", + account=account_name, + username=user_key, + ) + trace("request:start") # 1) Try on-disk cache first (fast path) user_entry = None @@ -436,17 +444,25 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): logger.info(f"[avatar_cache_hit] kind=user account={account_name} username={user_key}") except Exception as e: logger.warning(f"[avatar_cache_error] read user cache failed account={account_name} username={user_key} err={e}") + trace( + "user-cache:checked", + cacheEnabled=bool(is_avatar_cache_enabled()), + hasEntry=bool(user_entry), + hasFile=bool(cached_file is not None), + ) head_image_db_path = account_dir / "head_image.db" if not head_image_db_path.exists(): # No local head_image.db: allow fallback from cached/remote URL path. if cached_file is not None and user_entry: headers = build_avatar_cache_response_headers(user_entry) + trace("response:ready", result="user-cache-hit-no-head-image", mediaType=str(user_entry.get("media_type") or "")) return FileResponse( str(cached_file), media_type=str(user_entry.get("media_type") or "application/octet-stream"), headers=headers, ) + trace("response:error", result="head-image-db-missing") raise HTTPException(status_code=404, detail="head_image.db not found.") conn = sqlite3.connect(str(head_image_db_path)) @@ -455,6 +471,7 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): "SELECT md5, update_time FROM head_image WHERE username = ? ORDER BY update_time DESC LIMIT 1", (username,), ).fetchone() + trace("head-image:meta", hasMeta=bool(meta and meta[0] is not None)) if meta and meta[0] is not None: db_md5 = str(meta[0] or "").strip().lower() try: @@ -472,6 +489,11 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): if cached_md5 == db_md5 and cached_update == db_update_time: touch_avatar_cache_entry(account_name, str(user_entry.get("cache_key") or "")) headers = build_avatar_cache_response_headers(user_entry) + trace( + "response:ready", + result="user-cache-hit-head-image-matched", + mediaType=str(user_entry.get("media_type") or ""), + ) return FileResponse( str(cached_file), media_type=str(user_entry.get("media_type") or "application/octet-stream"), @@ -487,6 +509,7 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): data = bytes(row[0]) if isinstance(row[0], (memoryview, bytearray)) else row[0] if not isinstance(data, (bytes, bytearray)): data = bytes(data) + trace("head-image:blob", bytes=len(data or b"")) if data: media_type = _detect_image_media_type(data) media_type = media_type if media_type.startswith("image/") else "application/octet-stream" @@ -505,12 +528,14 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): f"[avatar_cache_download] kind=user account={account_name} username={user_key} src=head_image" ) headers = build_avatar_cache_response_headers(entry) + trace("response:ready", result="head-image-blob-cache-write", mediaType=media_type, bytes=len(data)) return FileResponse(str(out_path), media_type=media_type, headers=headers) # cache write failed: fallback to response bytes logger.warning( f"[avatar_cache_error] kind=user account={account_name} username={user_key} action=write_fallback" ) + trace("response:ready", result="head-image-blob-direct", mediaType=media_type, bytes=len(data)) return Response(content=bytes(data), media_type=media_type) # meta not found (no local avatar blob) @@ -520,9 +545,16 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): # 2) Fallback: remote avatar URL (contact/WCDB), cache by URL. remote_url = _resolve_avatar_remote_url(account_dir=account_dir, username=user_key) + trace("remote-url:resolved", hasRemoteUrl=bool(remote_url)) if remote_url and is_avatar_cache_enabled(): url_entry = get_avatar_cache_url_entry(account_name, remote_url) url_file = avatar_cache_entry_file_exists(account_name, url_entry) + trace( + "url-cache:checked", + hasEntry=bool(url_entry), + hasFile=bool(url_file), + isFresh=bool(avatar_cache_entry_is_fresh(url_entry) if url_entry else False), + ) if url_entry and url_file and avatar_cache_entry_is_fresh(url_entry): logger.info(f"[avatar_cache_hit] kind=url account={account_name} username={user_key}") touch_avatar_cache_entry(account_name, str(url_entry.get("cache_key") or "")) @@ -548,6 +580,7 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): except Exception: pass headers = build_avatar_cache_response_headers(url_entry) + trace("response:ready", result="url-cache-hit", mediaType=str(url_entry.get("media_type") or "")) return FileResponse( str(url_file), media_type=str(url_entry.get("media_type") or "application/octet-stream"), @@ -624,21 +657,31 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): etag0 = str((url_entry or {}).get("etag") or "").strip() lm0 = str((url_entry or {}).get("last_modified") or "").strip() try: + trace("remote-download:start", hasEtag=bool(etag0), hasLastModified=bool(lm0)) payload, ct, etag_new, lm_new, not_modified = await asyncio.to_thread( _download_remote_avatar, remote_url, etag=etag0, last_modified=lm0, ) + trace( + "remote-download:end", + bytes=len(payload or b""), + contentType=str(ct or ""), + notModified=bool(not_modified), + ) except Exception as e: logger.warning(f"[avatar_cache_error] kind=url account={account_name} username={user_key} err={e}") + trace("remote-download:error", error=str(e)) if url_entry and url_file: headers = build_avatar_cache_response_headers(url_entry) + trace("response:ready", result="stale-url-cache-after-download-error") return FileResponse( str(url_file), media_type=str(url_entry.get("media_type") or "application/octet-stream"), headers=headers, ) + trace("response:error", result="remote-download-failed") raise HTTPException(status_code=404, detail="Avatar not found.") if not_modified and url_entry and url_file: @@ -663,6 +706,7 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): pass logger.info(f"[avatar_cache_revalidate] kind=url account={account_name} username={user_key} status=304") headers = build_avatar_cache_response_headers(url_entry) + trace("response:ready", result="remote-not-modified", mediaType=str(url_entry.get("media_type") or "")) return FileResponse( str(url_file), media_type=str(url_entry.get("media_type") or "application/octet-stream"), @@ -714,16 +758,19 @@ async def get_chat_avatar(username: str, account: Optional[str] = None): pass logger.info(f"[avatar_cache_download] kind=url account={account_name} username={user_key}") headers = build_avatar_cache_response_headers(entry) + trace("response:ready", result="remote-download-cache-write", mediaType=media_type, bytes=len(payload2)) return FileResponse(str(out_path), media_type=media_type, headers=headers) if cached_file is not None and user_entry: headers = build_avatar_cache_response_headers(user_entry) + trace("response:ready", result="stale-user-cache-fallback", mediaType=str(user_entry.get("media_type") or "")) return FileResponse( str(cached_file), media_type=str(user_entry.get("media_type") or "application/octet-stream"), headers=headers, ) + trace("response:error", result="not-found") raise HTTPException(status_code=404, detail="Avatar not found.") @@ -1387,6 +1434,7 @@ async def get_chat_image( account: Optional[str] = None, username: Optional[str] = None, deep_scan: bool = False, + prefer_live: bool = False, ): if (not md5) and (not file_id) and (not server_id): raise HTTPException(status_code=400, detail="Missing md5/file_id/server_id.") @@ -1396,6 +1444,18 @@ async def get_chat_image( file_id = str(md5) md5 = None account_dir = _resolve_account_dir(account) + _trace_id, trace = create_perf_trace( + logger, + "chat.image", + account=account_dir.name, + username=str(username or ""), + md5=str(md5 or ""), + fileId=str(file_id or ""), + serverId=int(server_id or 0), + deepScan=bool(deep_scan), + preferLive=bool(prefer_live), + ) + trace("request:start") # Prefer resource md5 derived from message_resource.db for chat history / app messages. # This matches how regular image messages are resolved elsewhere in the codebase. @@ -1409,6 +1469,11 @@ async def get_chat_image( ) if md5_from_msg: md5 = md5_from_msg + trace( + "server-id:resolved", + resourceMd5Found=bool(resource_md5), + finalMd5=str(md5 or ""), + ) cached_path: Optional[Path] = None cached_data = b"" @@ -1430,12 +1495,33 @@ async def get_chat_image( decrypted_path.unlink() except Exception: pass + trace( + "decrypted-cache:checked", + hasCachedPath=bool(cached_path), + cachedBytes=len(cached_data or b""), + cachedMediaType=cached_media_type, + ) + + if cached_path and (not prefer_live): + trace( + "response:ready", + result="decrypted-cache-hit", + mediaType=cached_media_type, + bytes=len(cached_data or b""), + ) + return _build_cached_media_response(request, cached_data, cached_media_type) # 回退:从微信数据目录实时定位并解密 wxid_dir = _resolve_account_wxid_dir(account_dir) hardlink_db_path = account_dir / "hardlink.db" db_storage_dir = _resolve_account_db_storage_dir(account_dir) hardlink_has_image_table = _hardlink_has_table_prefix(str(hardlink_db_path), "image_hardlink_info") + trace( + "roots:resolved", + hasWxidDir=bool(wxid_dir), + hasDbStorageDir=bool(db_storage_dir), + hardlinkHasImageTable=bool(hardlink_has_image_table), + ) roots: list[Path] = [] if wxid_dir: @@ -1455,9 +1541,11 @@ async def get_chat_image( p: Optional[Path] = None candidates: list[Path] = [] + allow_deep_scan = False if md5: - p = _resolve_media_path_from_hardlink( + p = await asyncio.to_thread( + _resolve_media_path_from_hardlink, hardlink_db_path, roots[0], md5=str(md5), @@ -1471,7 +1559,8 @@ async def get_chat_image( for r in [wxid_dir, db_storage_dir]: if not r: continue - hit = _fallback_search_media_by_file_id( + hit = await asyncio.to_thread( + _fallback_search_media_by_file_id, str(r), str(file_id), kind="image", @@ -1483,7 +1572,8 @@ async def get_chat_image( # Fast fallback for thumbnails not indexed by hardlink.db: scan only this chat's attach directory. if (not p) and wxid_dir and username: - hit = _fast_probe_image_path_in_chat_attach( + hit = await asyncio.to_thread( + _fast_probe_image_path_in_chat_attach, wxid_dir_str=str(wxid_dir), username=str(username), md5=str(md5), @@ -1496,11 +1586,11 @@ async def get_chat_image( # - hardlink.db doesn't have the image table (older/partial data). allow_deep_scan = bool(deep_scan) or (not hardlink_has_image_table) if (not p) and wxid_dir and allow_deep_scan: - hit = _fallback_search_media_by_md5(str(wxid_dir), str(md5), kind="image") + hit = await asyncio.to_thread(_fallback_search_media_by_md5, str(wxid_dir), str(md5), kind="image") if hit: p = Path(hit) try: - candidates.extend(_iter_media_source_candidates(Path(hit))) + candidates.extend(await asyncio.to_thread(_iter_media_source_candidates, Path(hit))) except Exception: pass elif file_id: @@ -1508,7 +1598,8 @@ async def get_chat_image( for r in [wxid_dir, db_storage_dir]: if not r: continue - hit = _fallback_search_media_by_file_id( + hit = await asyncio.to_thread( + _fallback_search_media_by_file_id, str(r), str(file_id), kind="image", @@ -1520,11 +1611,25 @@ async def get_chat_image( if not p: if cached_path: + trace("response:ready", result="decrypted-cache-fallback", mediaType=cached_media_type, bytes=len(cached_data or b"")) return _build_cached_media_response(request, cached_data, cached_media_type) + trace( + "response:error", + result="source-not-found", + allowDeepScan=bool(allow_deep_scan), + candidateCount=len(candidates), + ) raise HTTPException(status_code=404, detail="Image not found.") - candidates.extend(_iter_media_source_candidates(p)) - candidates = _order_media_candidates(candidates) + candidates.extend(await asyncio.to_thread(_iter_media_source_candidates, p)) + candidates = await asyncio.to_thread(_order_media_candidates, candidates) + trace( + "candidates:resolved", + sourcePath=str(p), + candidateCount=len(candidates), + hasCachedPath=bool(cached_path), + allowDeepScan=bool(allow_deep_scan), + ) if cached_path: try: @@ -1554,9 +1659,17 @@ async def get_chat_image( data = b"" media_type = "application/octet-stream" chosen: Optional[Path] = None + decode_attempts = 0 + trace("decode:start", candidateCount=len(candidates)) for src_path in candidates: + decode_attempts += 1 try: - data, media_type = _read_and_maybe_decrypt_media(src_path, account_dir=account_dir, weixin_root=wxid_dir) + data, media_type = await asyncio.to_thread( + _read_and_maybe_decrypt_media, + src_path, + account_dir=account_dir, + weixin_root=wxid_dir, + ) except Exception: continue @@ -1568,18 +1681,32 @@ async def get_chat_image( break if not chosen: + trace("response:error", result="decode-failed", decodeAttempts=decode_attempts) raise HTTPException(status_code=422, detail="Image found but failed to decode/decrypt.") + trace( + "decode:chosen", + decodeAttempts=decode_attempts, + chosen=str(chosen), + mediaType=media_type, + bytes=len(data or b""), + ) + # 仅在 md5 有效时缓存到 resource 目录;file_id 可能非常长,避免写入超长文件名 if md5 and media_type.startswith("image/"): try: - _write_cached_chat_image(account_dir, str(md5), data) + await asyncio.to_thread(_write_cached_chat_image, account_dir, str(md5), data) + trace("decrypted-cache:write", skipped=False) except Exception: + trace("decrypted-cache:write", skipped=False, error=True) pass + else: + trace("decrypted-cache:write", skipped=True) logger.info( f"chat_image: md5={md5} file_id={file_id} chosen={chosen} media_type={media_type} bytes={len(data)}" ) + trace("response:ready", result="decoded", mediaType=media_type, bytes=len(data or b"")) return _build_cached_media_response(request, data, media_type)