From 7f5f0816ec71f3c0e457fdce878ad28722094e2f Mon Sep 17 00:00:00 2001 From: daidai Date: Sun, 5 Apr 2026 00:08:38 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=20thinking/reasoning?= =?UTF-8?q?=20=E6=8F=90=E5=8F=96=E4=B8=8E=E4=BC=A0=E9=80=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - respond.js: buildChatCompletion 和 buildChatCompletionChunk 新增 reasoningContent 参数,输出 OpenAI o1 兼容的 reasoning_content 字段 - openai/routes.js: 解析请求体中的 reasoning 布尔标志,prompt 预览扩展到 100 字 - queue.js: 将 reasoning 标志传递给 generate - lmarena_text.js: 解析 SSE 中的 ag: 行提取思考过程 - deepseek_text.js: 识别 THINK fragment 类型,收集 thinking 内容 - gemini_text.js: 从 protobuf 响应的 rc[37] 路径提取 thinking 内容 --- src/backend/adapter/deepseek_text.js | 46 ++++++++++++++-- src/backend/adapter/gemini.js | 62 ++++++++++++++++++++- src/backend/adapter/gemini_text.js | 81 ++++++++++++++++++++++++---- src/backend/adapter/lmarena_text.js | 30 ++++++++--- src/server/api/openai/routes.js | 6 ++- src/server/queue.js | 4 +- src/server/respond.js | 28 +++++++--- src/utils/logger.js | 22 ++++++-- 8 files changed, 243 insertions(+), 36 deletions(-) diff --git a/src/backend/adapter/deepseek_text.js b/src/backend/adapter/deepseek_text.js index 2fddc49..b618703 100644 --- a/src/backend/adapter/deepseek_text.js +++ b/src/backend/adapter/deepseek_text.js @@ -79,7 +79,7 @@ async function configureModel(page, modelConfig, meta = {}) { * @param {string[]} imgPaths - 图片路径数组 (此适配器不支持) * @param {string} [modelId] - 模型 ID * @param {object} [meta={}] - 日志元数据 - * @returns {Promise<{text?: string, error?: string}>} + * @returns {Promise<{text?: string, reasoning?: string, error?: string}>} */ async function generate(context, prompt, imgPaths, modelId, meta = {}) { const { page, config } = context; @@ -108,8 +108,10 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { logger.debug('适配器', '启动 API 监听...', meta); let textContent = ''; + let thinkingContent = ''; // thinking 内容 let isComplete = false; let isCollecting = false; // 当前最后一个 fragment 是否为 RESPONSE 类型 + let isCollectingThinking = false; // 是否正在收集 thinking const responsePromise = page.waitForResponse(async (response) => { const url = response.url(); @@ -133,14 +135,21 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // --- 处理 fragment 列表变更,更新 isCollecting 状态 --- - // 初始响应中可能已有 fragments (如 SEARCH / RESPONSE) + // 初始响应中可能已有 fragments (如 THINK / SEARCH / RESPONSE) if (data.v?.response?.fragments && Array.isArray(data.v.response.fragments)) { for (const fragment of data.v.response.fragments) { if (fragment.type === 'RESPONSE') { isCollecting = true; + isCollectingThinking = false; if (fragment.content) textContent += fragment.content; + } else if (fragment.type === 'THINK') { + // DeepSeek 使用 THINK (不是 THINKING) + isCollectingThinking = true; + isCollecting = false; + if (fragment.content) thinkingContent += fragment.content; } else { isCollecting = false; + isCollectingThinking = false; } } } @@ -150,9 +159,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { for (const fragment of data.v) { if (fragment.type === 'RESPONSE') { isCollecting = true; + isCollectingThinking = false; if (fragment.content) textContent += fragment.content; + } else if (fragment.type === 'THINK') { + isCollectingThinking = true; + isCollecting = false; + if (fragment.content) thinkingContent += fragment.content; } else { isCollecting = false; + isCollectingThinking = false; } } } @@ -164,9 +179,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { for (const fragment of item.v) { if (fragment.type === 'RESPONSE') { isCollecting = true; + isCollectingThinking = false; if (fragment.content) textContent += fragment.content; + } else if (fragment.type === 'THINK') { + isCollectingThinking = true; + isCollecting = false; + if (fragment.content) thinkingContent += fragment.content; } else { isCollecting = false; + isCollectingThinking = false; } } } @@ -182,8 +203,12 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 带路径的 content 操作 (如 response/fragments/-1/content) if (data.p && typeof data.v === 'string') { const match = data.p.match(/response\/fragments\/(-?\d+)\/content/); - if (match && isCollecting) { - textContent += data.v; + if (match) { + if (isCollecting) { + textContent += data.v; + } else if (isCollectingThinking) { + thinkingContent += data.v; + } } } @@ -191,6 +216,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { if (data.v && typeof data.v === 'string' && !data.p && !data.o) { if (isCollecting) { textContent += data.v; + } else if (isCollectingThinking) { + thinkingContent += data.v; } } @@ -233,7 +260,16 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { logger.info('适配器', `已获取文本内容 (${textContent.length} 字符)`, meta); logger.info('适配器', '文本生成完成,任务完成', meta); - return { text: textContent.trim() }; + + const trimmedThinking = thinkingContent.trim(); + const result = { text: textContent.trim() }; + + // 返回结果(如果有 thinking 则包含 reasoning) + if (trimmedThinking) { + logger.info('适配器', `已获取思考过程 (${trimmedThinking.length} 字符)`, meta); + result.reasoning = trimmedThinking; + } + return result; } catch (err) { // 顶层错误处理 diff --git a/src/backend/adapter/gemini.js b/src/backend/adapter/gemini.js index 5a0584c..4affb44 100644 --- a/src/backend/adapter/gemini.js +++ b/src/backend/adapter/gemini.js @@ -179,6 +179,12 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { const imageUrl = imageUrls[0] + '=s1024-rj'; logger.info('适配器', `找到 ${imageUrls.length} 张图片,开始下载...`, meta); + // 提取图片生成的详细描述(thinking) + const thinking = extractImageThinking(bodyBuffer); + if (thinking) { + logger.info('适配器', `提取到详细描述,长度: ${thinking.length}`, meta); + } + // 使用封装的下载函数 const imgDlCfg = config?.backend?.pool?.failover || {}; const result = await useContextDownload(imageUrl, page, { @@ -190,7 +196,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { } logger.info('适配器', '已获取图片,任务完成', meta); - return result; + // 返回图片和 thinking(如果有) + return thinking ? { ...result, reasoning: thinking } : result; } } catch (err) { @@ -428,4 +435,57 @@ function extractAiTextFromResponse(bodyBuffer) { } } return best; +} + +/** + * 深度遍历,查找长文本描述(图片生成的 thinking/详细描述) + * 排除 URL、base64、分类器名称等非描述性长字符串 + * @param {any} root - 要遍历的对象 + * @returns {string} 最长的描述文本,未找到则返回空字符串 + */ +function findLongDescriptionDeep(root) { + const candidates = []; + const stack = [root]; + + while (stack.length) { + const cur = stack.pop(); + if (!cur) continue; + + if (typeof cur === 'string') { + if (cur.length > 200 && + !cur.startsWith('http') && + !cur.startsWith('data:') && + !cur.includes('googleapis.com') && + !cur.includes('googleusercontent.com') && + !/^[A-Za-z0-9+/=]{100,}$/.test(cur)) { + candidates.push(cur); + } + } else if (Array.isArray(cur)) { + for (const v of cur) stack.push(v); + } else if (typeof cur === 'object') { + for (const v of Object.values(cur)) stack.push(v); + } + } + + if (candidates.length === 0) return ''; + return candidates.reduce((a, b) => a.length >= b.length ? a : b, ''); +} + +/** + * 从响应体 Buffer 中提取图片生成的详细描述(thinking) + * @param {Buffer} bodyBuffer - 响应体 Buffer + * @returns {string} 详细描述文本,未找到则返回空字符串 + */ +function extractImageThinking(bodyBuffer) { + const frames = parseLenFramedResponse(bodyBuffer); + const payloads = extractPayloads(frames); + + let best = ''; + for (const payload of payloads) { + const text = findLongDescriptionDeep(payload); + if (text.length > best.length) { + best = text; + } + } + return best; } \ No newline at end of file diff --git a/src/backend/adapter/gemini_text.js b/src/backend/adapter/gemini_text.js index 2ba652e..65acab0 100644 --- a/src/backend/adapter/gemini_text.js +++ b/src/backend/adapter/gemini_text.js @@ -27,7 +27,7 @@ const TARGET_URL = 'https://gemini.google.com/app?hl=en'; * @param {string[]} imgPaths - 图片路径数组 * @param {string} [modelId] - 模型 ID (此适配器未使用) * @param {object} [meta={}] - 日志元数据 - * @returns {Promise<{text?: string, error?: string}>} + * @returns {Promise<{text?: string, reasoning?: string, error?: string}>} */ async function generate(context, prompt, imgPaths, modelId, meta = {}) { const { page, config } = context; @@ -172,11 +172,11 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { const bodyBuffer = await apiResponse.body(); logger.debug('适配器', `收到响应体,字节数: ${bodyBuffer.length}`, meta); - const text = getFinalAiTextFromResponse(bodyBuffer); + const { text, reasoning } = getFinalAiTextFromResponse(bodyBuffer); if (text) { - logger.info('适配器', `解析成功,文本长度: ${text.length}`, meta); - return { text }; + logger.info('适配器', `解析成功,文本长度: ${text.length},思考长度: ${reasoning?.length || 0}`, meta); + return reasoning ? { text, reasoning } : { text }; } else { return { error: '未能从响应中提取文本' }; } @@ -390,19 +390,80 @@ function collectRcTextsDeep(root) { } /** - * 从响应体 Buffer 中提取最终 AI 文本 + * 从单个 payload 中提取文本和 thinking 内容 + * @param {Array} payload - 解析后的 payload + * @returns {{text: string, thinking: string}} + */ +function extractTextAndThinking(payload) { + let text = ''; + let thinking = ''; + + try { + if (!Array.isArray(payload)) return { text, thinking }; + + // 找 rc 节点 (通常在 payload[4][0]) + // 结构: payload[4][0] = ["rc_xxx", ["text..."], ..., [37]: [[thinking]]] + let rc = null; + if (payload[4] && Array.isArray(payload[4][0]) && + typeof payload[4][0][0] === 'string' && payload[4][0][0].startsWith('rc_')) { + rc = payload[4][0]; + } + + if (!rc) return { text, thinking }; + + // 文本在 rc[1][0] + if (Array.isArray(rc[1]) && typeof rc[1][0] === 'string') { + text = rc[1][0]; + } + + // thinking 在 rc[37][0][0] + // 结构: rc[37] = [["**Thinking Title**\n\nThinking content..."]] + try { + if (rc[37] && Array.isArray(rc[37]) && rc[37][0] && Array.isArray(rc[37][0])) { + if (typeof rc[37][0][0] === 'string') { + thinking = rc[37][0][0]; + } + } + } catch { + // thinking 提取失败,忽略 + } + } catch { + // ignore + } + + return { text, thinking }; +} + +/** + * 从响应体 Buffer 中提取最终 AI 文本和 thinking * @param {Buffer} bodyBuffer - 响应体 Buffer + * @returns {{text: string, reasoning: string}} */ function getFinalAiTextFromResponse(bodyBuffer) { const frames = parseLenFramedResponse(bodyBuffer); const payloads = extractPayloads(frames); - let best = ""; + let bestText = ''; + let bestThinking = ''; + + // 遍历所有 payload,保留最长的 text 和对应的 thinking for (const payload of payloads) { - const m = collectRcTextsDeep(payload); - for (const text of m.values()) { - if (text.length > best.length) best = text; + const { text, thinking } = extractTextAndThinking(payload); + if (text.length > bestText.length) { + bestText = text; + bestThinking = thinking; } } - return best; + + // 如果 extractTextAndThinking 没找到(可能结构不匹配),fallback 到 collectRcTextsDeep + if (!bestText) { + for (const payload of payloads) { + const m = collectRcTextsDeep(payload); + for (const text of m.values()) { + if (text.length > bestText.length) bestText = text; + } + } + } + + return { text: bestText, reasoning: bestThinking }; } \ No newline at end of file diff --git a/src/backend/adapter/lmarena_text.js b/src/backend/adapter/lmarena_text.js index f580d09..e2043fe 100644 --- a/src/backend/adapter/lmarena_text.js +++ b/src/backend/adapter/lmarena_text.js @@ -134,30 +134,48 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { } // 9. 解析文本流 - // 格式示例: + // SSE 格式说明: + // - a0: 回复内容(最终文本) + // - ag: 思考过程 (thinking/reasoning) - 仅 thinking 模型有 + // - a2: 心跳 [{"type":"heartbeat"}] + // - ad: 结束标记 {"finishReason":"stop"} + // 示例: + // ag:"Let me think..." // a0:"Hello" // a0:" World" - // d:{"finishReason":"stop"} + // ad:{"finishReason":"stop"} let fullText = ''; + let thinkingText = ''; const lines = content.split('\n'); for (const line of lines) { if (line.startsWith('a0:')) { try { - // 尝试解析 JSON 字符串内容 - // line.substring(3) 应该是 JSON 字符串,如 "Hello" const textPart = JSON.parse(line.substring(3)); fullText += textPart; } catch (e) { - // 如果解析失败,可能是原生文本或其他格式 logger.warn('适配器', `解析文本块失败: ${line}`, meta); } + } else if (line.startsWith('ag:')) { + // 思考过程内容 + try { + const thinkPart = JSON.parse(line.substring(3)); + thinkingText += thinkPart; + } catch (e) { + logger.warn('适配器', `解析思考块失败: ${line}`, meta); + } } } if (fullText) { logger.info('适配器', `获取文本成功,长度: ${fullText.length}`, meta); - return { text: fullText }; + const result = { text: fullText }; + // 如果有思考过程,添加到 reasoning 字段 + if (thinkingText.trim()) { + logger.info('适配器', `获取思考过程,长度: ${thinkingText.length}`, meta); + result.reasoning = thinkingText; + } + return result; } else { logger.warn('适配器', '未解析到有效文本内容', { ...meta, preview: content.substring(0, 150) }); // 如果没解析到 a0,尝试直接返回原始内容防空 diff --git a/src/server/api/openai/routes.js b/src/server/api/openai/routes.js index ff71b58..bfba48d 100644 --- a/src/server/api/openai/routes.js +++ b/src/server/api/openai/routes.js @@ -123,8 +123,9 @@ export function createOpenAIRouter(context) { } const { prompt, imagePaths, modelId, modelName } = parseResult.data; + const reasoning = data.reasoning === true; - logger.info('服务器', `[队列] 请求入队: ${prompt.slice(0, 10)}...`, { id: requestId, images: imagePaths.length }); + logger.info('服务器', `[队列] 请求入队: ${prompt.slice(0, 100)}...`, { id: requestId, images: imagePaths.length }); // 加入队列 queueManager.addTask({ @@ -135,7 +136,8 @@ export function createOpenAIRouter(context) { modelId, modelName, id: requestId, - isStreaming + isStreaming, + reasoning }); } catch (err) { diff --git a/src/server/queue.js b/src/server/queue.js index 1e2a20e..fc44f98 100644 --- a/src/server/queue.js +++ b/src/server/queue.js @@ -94,7 +94,7 @@ export function createQueueManager(queueConfig, callbacks) { * @param {TaskContext} task - 任务上下文 */ async function processTask(task) { - const { res, prompt, imagePaths, modelId, modelName, id, isStreaming } = task; + const { res, prompt, imagePaths, modelId, modelName, id, isStreaming, reasoning } = task; const startTime = Date.now(); logger.info('服务器', '[队列] 开始处理任务', { id, remaining: queue.length }); @@ -133,7 +133,7 @@ export function createQueueManager(queueConfig, callbacks) { } // 调用核心生图逻辑 (通过 Pool 分发) - const result = await generate(poolContext, prompt, imagePaths, modelId, { id }); + const result = await generate(poolContext, prompt, imagePaths, modelId, { id, reasoning }); // 清除心跳 if (heartbeatInterval) clearInterval(heartbeatInterval); diff --git a/src/server/respond.js b/src/server/respond.js index a5ababb..618d40f 100644 --- a/src/server/respond.js +++ b/src/server/respond.js @@ -106,9 +106,19 @@ export function sendApiError(res, options) { * 构造 OpenAI 格式的聊天完成响应(非流式) * @param {string} content - 响应内容 * @param {string} [modelName] - 模型名称 + * @param {string} [reasoningContent] - 思考/推理过程内容 (OpenAI o1 格式) * @returns {object} OpenAI 格式的响应对象 */ -export function buildChatCompletion(content, modelName) { +export function buildChatCompletion(content, modelName, reasoningContent) { + const message = { + role: 'assistant', + content: content + }; + + if (reasoningContent) { + message.reasoning_content = reasoningContent; + } + return { id: 'chatcmpl-' + Date.now(), object: 'chat.completion', @@ -116,10 +126,7 @@ export function buildChatCompletion(content, modelName) { model: modelName || 'default-model', choices: [{ index: 0, - message: { - role: 'assistant', - content: content - }, + message, finish_reason: 'stop' }] }; @@ -130,9 +137,16 @@ export function buildChatCompletion(content, modelName) { * @param {string} content - 响应内容 * @param {string} [modelName] - 模型名称 * @param {string|null} [finishReason='stop'] - 完成原因 + * @param {string} [reasoningContent] - 思考/推理过程内容 (OpenAI o1 格式) * @returns {object} OpenAI 格式的流式响应块 */ -export function buildChatCompletionChunk(content, modelName, finishReason = 'stop') { +export function buildChatCompletionChunk(content, modelName, finishReason = 'stop', reasoningContent) { + const delta = { content }; + + if (reasoningContent) { + delta.reasoning_content = reasoningContent; + } + return { id: 'chatcmpl-' + Date.now(), object: 'chat.completion.chunk', @@ -140,7 +154,7 @@ export function buildChatCompletionChunk(content, modelName, finishReason = 'sto model: modelName || 'default-model', choices: [{ index: 0, - delta: { content }, + delta, finish_reason: finishReason }] }; diff --git a/src/utils/logger.js b/src/utils/logger.js index 92194b4..001c639 100644 --- a/src/utils/logger.js +++ b/src/utils/logger.js @@ -112,6 +112,9 @@ function shouldLog(level) { return targetIndex >= effectiveEnvIndex; } +// 需要提取到前面用方括号显示的 meta 字段 +const FRONT_META_KEYS = ['id', 'adapter', 'model']; + export function log(level, mod, msg, meta = {}) { if (!shouldLog(level)) return; @@ -121,10 +124,23 @@ export function log(level, mod, msg, meta = {}) { // 将消息中的换行符替换为 ↵ 符号,保持日志为单行 const sanitizedMsg = msg.replace(/\r?\n/g, ' ↵ '); - const base = `${ts} [${levelTag}] [${mod}] ${sanitizedMsg}`; - const metaStr = Object.keys(meta).length - ? ' | ' + Object.entries(meta).map(([k, v]) => { + // 提取关键字段放在前面用方括号显示 + const frontParts = []; + const remainingMeta = {}; + for (const [k, v] of Object.entries(meta)) { + if (FRONT_META_KEYS.includes(k) && v !== undefined && v !== null) { + frontParts.push(`[${v}]`); + } else { + remainingMeta[k] = v; + } + } + const frontStr = frontParts.length ? ' ' + frontParts.join(' ') : ''; + + const base = `${ts} [${levelTag}] [${mod}]${frontStr} ${sanitizedMsg}`; + + const metaStr = Object.keys(remainingMeta).length + ? ' | ' + Object.entries(remainingMeta).map(([k, v]) => { if (v instanceof Error) { return `${k}=${v.message}`; }