diff --git a/CHANGELOG.md b/CHANGELOG.md index 016c043..fc2ceed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.4.6] - 2026-01-11 + +### 🔄 Changed +- **优化速度** + - 删除或减少过于保守的控件等待时间 + ## [3.4.5] - 2026-01-11 ### ✨ Added diff --git a/src/backend/adapter/chatgpt.js b/src/backend/adapter/chatgpt.js index 7be2f63..c1b874a 100644 --- a/src/backend/adapter/chatgpt.js +++ b/src/backend/adapter/chatgpt.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, moveMouseAway, waitForInput, @@ -41,14 +41,14 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); // 2. 上传图片 if (imgPaths && imgPaths.length > 0) { + const expectedUploads = imgPaths.length; let uploadedCount = 0; let processedCount = 0; - + logger.info('适配器', `开始上传 ${expectedUploads} 张图片...`, meta); logger.debug('适配器', '点击添加文件按钮...', meta); const addFilesBtn = page.getByRole('button', { name: 'Add files and more' }); @@ -75,17 +75,16 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { return false; } }); - - await sleep(1000, 2000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); - // 4. 点击发送 - logger.debug('适配器', '点击发送...', meta); + // 4. 发送提示词 + logger.debug('适配器', '发送提示词...', meta); await safeClick(page, sendBtnLocator, { bias: 'button' }); logger.info('适配器', '等待生成结果...', meta); @@ -96,7 +95,7 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { conversationResponse = await waitApiResponse(page, { urlMatch: 'backend-api/f/conversation', method: 'POST', - timeout: 180000, // 图片生成可能较慢 + timeout: 120000, // 图片生成可能较慢 meta }); } catch (e) { @@ -131,15 +130,16 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 检查是否生成完成: // 1. 必须有 file_name - // 2. file_name 不能包含 .part(表示中间状态) - // 3. 必须有 download_url - if (fn && !fn.includes('.part') && dl) { + // 2. file_name 开头必须是 user- (生成的图片) + // 3. file_name 不能包含 .part(表示中间状态) + // 4. 必须有 download_url + if (fn && fn.startsWith('user-') && !fn.includes('.part') && dl) { fileName = fn; downloadUrl = dl; logger.info('适配器', `图片生成完成: ${fn}`, meta); return true; } else { - logger.debug('适配器', `图片生成中: ${fn || '无文件名'}`, meta); + logger.debug('适配器', `图片生成中或非生成图片: ${fn || '无文件名'}`, meta); return false; } } catch { @@ -197,7 +197,7 @@ export const manifest = { // 模型列表 models: [ - { id: 'gpt-image-1', imagePolicy: 'optional' } + { id: 'gpt-image-1.5', imagePolicy: 'optional' } ], // 无需导航处理器 diff --git a/src/backend/adapter/chatgpt_text.js b/src/backend/adapter/chatgpt_text.js index ca449f7..3f55091 100644 --- a/src/backend/adapter/chatgpt_text.js +++ b/src/backend/adapter/chatgpt_text.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, moveMouseAway, waitForInput, @@ -39,9 +39,8 @@ async function selectModel(page, codeName, meta = {}) { } await modelSelectorBtn.waitFor({ timeout: 5000 }); - await sleep(300, 500); await safeClick(page, modelSelectorBtn, { bias: 'button' }); - await sleep(500, 800); + await sleep(300, 500); // 2. 检查是否有 Legacy models 选项 const legacyMenuItem = page.getByRole('menuitem', { name: /^Legacy models/ }); @@ -49,7 +48,7 @@ async function selectModel(page, codeName, meta = {}) { if (legacyExists > 0) { logger.debug('适配器', '发现 Legacy models 选项,正在点击...', meta); await safeClick(page, legacyMenuItem, { bias: 'button' }); - await sleep(500, 800); + await sleep(300, 500); } // 3. 查找匹配 codeName 开头的 menuitem @@ -58,13 +57,11 @@ async function selectModel(page, codeName, meta = {}) { if (targetExists > 0) { logger.info('适配器', `正在选择模型: ${codeName}`, meta); await safeClick(page, targetMenuItem, { bias: 'button' }); - await sleep(500, 1000); return true; } else { logger.debug('适配器', `未找到模型 ${codeName},使用默认模型`, meta); // 点击空白区域关闭菜单 await page.keyboard.press('Escape'); - await sleep(300, 500); return false; } } catch (e) { @@ -94,7 +91,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); // 2. 选择模型 const modelConfig = manifest.models.find(m => m.id === modelId); @@ -105,6 +101,7 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 3. 上传图片 (双击 Add files and more 按钮) if (imgPaths && imgPaths.length > 0) { + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片...`, meta); const expectedUploads = imgPaths.length; let uploadedCount = 0; let processedCount = 0; @@ -136,17 +133,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { return false; } }); - - await sleep(1000, 2000); } - // 4. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); - // 5. 点击发送 - logger.debug('适配器', '点击发送...', meta); + // 5. 发送提示词 + logger.debug('适配器', '发送提示词...', meta); await safeClick(page, sendBtnLocator, { bias: 'button' }); logger.info('适配器', '等待生成结果...', meta); @@ -230,7 +225,7 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { } catch { return false; } - }, { timeout: 180000 }); + }, { timeout: 120000 }); } catch (e) { const pageError = normalizePageError(e, meta); if (pageError) return pageError; diff --git a/src/backend/adapter/deepseek_text.js b/src/backend/adapter/deepseek_text.js index 715c698..d460dcf 100644 --- a/src/backend/adapter/deepseek_text.js +++ b/src/backend/adapter/deepseek_text.js @@ -4,10 +4,10 @@ import { sleep, + humanType, safeClick } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, moveMouseAway, waitForInput, @@ -91,7 +91,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); // 2. 配置模型功能 (thinking / search) const modelConfig = manifest.models.find(m => m.id === modelId); @@ -99,19 +98,14 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await configureModel(page, modelConfig, meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); + await sleep(300, 500); - // 4. 按回车发送 - logger.debug('适配器', '按回车发送...', meta); - await page.keyboard.press('Enter'); - - logger.info('适配器', '等待生成结果...', meta); - - // 5. 监听 chat/completion SSE 流,解析文本内容 - logger.info('适配器', '监听 SSE 流获取文本...', meta); + // 4. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); let textContent = ''; let isComplete = false; @@ -119,127 +113,136 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { let currentFragmentIndex = -1; // 当前正在追加内容的 fragment 数组索引 let fragmentCount = 0; // fragments 数组的当前长度 - try { - await page.waitForResponse(async (response) => { - const url = response.url(); - if (!url.includes('chat/completion')) return false; - if (response.request().method() !== 'POST') return false; - if (response.status() !== 200) return false; + const responsePromise = page.waitForResponse(async (response) => { + const url = response.url(); + if (!url.includes('chat/completion')) return false; + if (response.request().method() !== 'POST') return false; + if (response.status() !== 200) return false; - try { - const body = await response.text(); - const lines = body.split('\n'); + try { + const body = await response.text(); + const lines = body.split('\n'); - for (const line of lines) { - // 跳过事件行和空行 - if (line.startsWith('event:') || !line.startsWith('data:')) continue; + for (const line of lines) { + // 跳过事件行和空行 + if (line.startsWith('event:') || !line.startsWith('data:')) continue; - const dataStr = line.slice(5).trim(); - if (!dataStr || dataStr === '{}') continue; + const dataStr = line.slice(5).trim(); + if (!dataStr || dataStr === '{}') continue; - try { - const data = JSON.parse(dataStr); + try { + const data = JSON.parse(dataStr); - // 初始响应中可能已有 fragments (如 SEARCH) - if (data.v?.response?.fragments && Array.isArray(data.v.response.fragments)) { - for (const fragment of data.v.response.fragments) { - const idx = fragmentCount++; - if (fragment.type === 'RESPONSE') { - responseFragmentIndex = idx; - currentFragmentIndex = idx; - if (fragment.content) { - textContent += fragment.content; - } - } else { - currentFragmentIndex = idx; + // 初始响应中可能已有 fragments (如 SEARCH) + if (data.v?.response?.fragments && Array.isArray(data.v.response.fragments)) { + for (const fragment of data.v.response.fragments) { + const idx = fragmentCount++; + if (fragment.type === 'RESPONSE') { + responseFragmentIndex = idx; + currentFragmentIndex = idx; + if (fragment.content) { + textContent += fragment.content; } + } else { + currentFragmentIndex = idx; } } + } - // 简单的文本追加 (只有 v 字符串,没有 p 和 o) - // 只有当前活跃的 fragment 是 RESPONSE 类型时才收集 - if (data.v && typeof data.v === 'string' && !data.p && !data.o) { - if (currentFragmentIndex === responseFragmentIndex && responseFragmentIndex >= 0) { + // 简单的文本追加 (只有 v 字符串,没有 p 和 o) + // 只有当前活跃的 fragment 是 RESPONSE 类型时才收集 + if (data.v && typeof data.v === 'string' && !data.p && !data.o) { + if (currentFragmentIndex === responseFragmentIndex && responseFragmentIndex >= 0) { + textContent += data.v; + } + } + + // 带路径的 APPEND 操作 (如 response/fragments/1/content) + if (data.o === 'APPEND' && data.p && typeof data.v === 'string') { + const match = data.p.match(/response\/fragments\/(\d+)\/content/); + if (match) { + const fragIdx = parseInt(match[1], 10); + currentFragmentIndex = fragIdx; + if (fragIdx === responseFragmentIndex) { textContent += data.v; } } - - // 带路径的 APPEND 操作 (如 response/fragments/1/content) - if (data.o === 'APPEND' && data.p && typeof data.v === 'string') { - const match = data.p.match(/response\/fragments\/(\d+)\/content/); - if (match) { - const fragIdx = parseInt(match[1], 10); - currentFragmentIndex = fragIdx; - if (fragIdx === responseFragmentIndex) { - textContent += data.v; - } - } - } - - // 不带操作符的路径设置 (如 {"v": "xxx", "p": "response/fragments/1/content"}) - if (data.p && typeof data.v === 'string' && !data.o) { - const match = data.p.match(/response\/fragments\/(\d+)\/content/); - if (match) { - const fragIdx = parseInt(match[1], 10); - currentFragmentIndex = fragIdx; - if (fragIdx === responseFragmentIndex) { - textContent += data.v; - } - } - } - - // fragments APPEND - 新增 fragment (非 BATCH) - if (data.p === 'response/fragments' && data.o === 'APPEND' && Array.isArray(data.v)) { - for (const fragment of data.v) { - const idx = fragmentCount++; - if (fragment.type === 'RESPONSE') { - responseFragmentIndex = idx; - currentFragmentIndex = idx; - if (fragment.content) { - textContent += fragment.content; - } - } else { - // THINK 或 SEARCH - currentFragmentIndex = idx; - } - } - } - - // BATCH 操作中的 fragments - if (data.o === 'BATCH' && data.p === 'response' && Array.isArray(data.v)) { - for (const item of data.v) { - // fragments 追加 - if (item.p === 'fragments' && item.o === 'APPEND' && Array.isArray(item.v)) { - for (const fragment of item.v) { - const idx = fragmentCount++; - if (fragment.type === 'RESPONSE') { - responseFragmentIndex = idx; - currentFragmentIndex = idx; - if (fragment.content) { - textContent += fragment.content; - } - } else { - // THINK 或 SEARCH - currentFragmentIndex = idx; - } - } - } - // 检查是否完成 - if (item.p === 'status' && item.v === 'FINISHED') { - isComplete = true; - } - } - } - } catch { - // 忽略解析错误 } - } - return isComplete; - } catch { - return false; + // 不带操作符的路径设置 (如 {"v": "xxx", "p": "response/fragments/1/content"}) + if (data.p && typeof data.v === 'string' && !data.o) { + const match = data.p.match(/response\/fragments\/(\d+)\/content/); + if (match) { + const fragIdx = parseInt(match[1], 10); + currentFragmentIndex = fragIdx; + if (fragIdx === responseFragmentIndex) { + textContent += data.v; + } + } + } + + // fragments APPEND - 新增 fragment (非 BATCH) + if (data.p === 'response/fragments' && data.o === 'APPEND' && Array.isArray(data.v)) { + for (const fragment of data.v) { + const idx = fragmentCount++; + if (fragment.type === 'RESPONSE') { + responseFragmentIndex = idx; + currentFragmentIndex = idx; + if (fragment.content) { + textContent += fragment.content; + } + } else { + // THINK 或 SEARCH + currentFragmentIndex = idx; + } + } + } + + // BATCH 操作中的 fragments + if (data.o === 'BATCH' && data.p === 'response' && Array.isArray(data.v)) { + for (const item of data.v) { + // fragments 追加 + if (item.p === 'fragments' && item.o === 'APPEND' && Array.isArray(item.v)) { + for (const fragment of item.v) { + const idx = fragmentCount++; + if (fragment.type === 'RESPONSE') { + responseFragmentIndex = idx; + currentFragmentIndex = idx; + if (fragment.content) { + textContent += fragment.content; + } + } else { + // THINK 或 SEARCH + currentFragmentIndex = idx; + } + } + } + // 检查是否完成 + if (item.p === 'status' && item.v === 'FINISHED') { + isComplete = true; + } + } + } + } catch { + // 忽略解析错误 + } } - }, { timeout: 180000 }); + + return isComplete; + } catch { + return false; + } + }, { timeout: 120000 }); + + // 5. 发送提示词 + logger.debug('适配器', '发送提示词...', meta); + await page.keyboard.press('Enter'); + + logger.info('适配器', '等待生成结果...', meta); + + // 6. 等待 API 响应 + try { + await responsePromise; } catch (e) { const pageError = normalizePageError(e, meta); if (pageError) return pageError; @@ -259,7 +262,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 顶层错误处理 const pageError = normalizePageError(err, meta); if (pageError) return pageError; - logger.error('适配器', '生成任务失败', { ...meta, error: err.message }); return { error: `生成任务失败: ${err.message}` }; } finally { diff --git a/src/backend/adapter/doubao.js b/src/backend/adapter/doubao.js index 3c2ad7c..b1f5b9c 100644 --- a/src/backend/adapter/doubao.js +++ b/src/backend/adapter/doubao.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, moveMouseAway, waitForInput, @@ -39,30 +39,27 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { try { logger.info('适配器', '开启新会话...', meta); await gotoWithCheck(page, TARGET_URL); - await sleep(1500, 2500); // 1. 点击进入图片生成模式 logger.debug('适配器', '进入图片生成模式...', meta); const skillBtn = page.locator('button[data-testid="skill_bar_button_3"]'); await skillBtn.waitFor({ state: 'visible', timeout: 30000 }); await safeClick(page, skillBtn, { bias: 'button' }); - await sleep(1000, 1500); // 2. 选择模型 logger.debug('适配器', `选择模型: ${codeName}...`, meta); const modelBtn = page.locator('button[data-testid="image-creation-chat-input-picture-model-button"]'); await modelBtn.waitFor({ state: 'visible', timeout: 10000 }); await safeClick(page, modelBtn, { bias: 'button' }); - await sleep(500, 800); + await sleep(300, 500); const modelOption = page.getByRole('menuitem', { name: codeName }); await modelOption.waitFor({ state: 'visible', timeout: 5000 }); await safeClick(page, modelOption, { bias: 'button' }); - await sleep(500, 800); // 3. 上传参考图片 (如果有) if (imgPaths && imgPaths.length > 0) { - logger.info('适配器', `开始上传 ${imgPaths.length} 张参考图片...`, meta); + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片...`, meta); const uploadBtn = page.locator('button[data-testid="image-creation-chat-input-picture-reference-button"]'); await uploadBtn.waitFor({ state: 'visible', timeout: 10000 }); @@ -76,15 +73,13 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { } }); - logger.info('适配器', '参考图片上传完成', meta); - await sleep(1000, 1500); + logger.info('适配器', '图片上传完成', meta); } // 4. 填写提示词 const inputLocator = page.locator('div[data-testid="chat_input_input"][role="textbox"]'); await waitForInput(page, inputLocator, { click: true }); - await fillPrompt(page, inputLocator, prompt, meta); - await sleep(500, 1000); + await humanType(page, inputLocator, prompt); // 5. 设置 SSE 监听 logger.debug('适配器', '启动 SSE 监听...', meta); diff --git a/src/backend/adapter/doubao_text.js b/src/backend/adapter/doubao_text.js index 032ecdb..8847b6c 100644 --- a/src/backend/adapter/doubao_text.js +++ b/src/backend/adapter/doubao_text.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, moveMouseAway, waitForInput, @@ -37,21 +37,19 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { try { logger.info('适配器', '开启新会话...', meta); await gotoWithCheck(page, TARGET_URL); - await sleep(1500, 2500); // 1. 等待输入框加载 const inputLocator = page.locator('textarea[data-testid="chat_input_input"]'); await waitForInput(page, inputLocator, { click: false }); - await sleep(500, 1000); // 2. 上传图片 (如果有) if (imgPaths && imgPaths.length > 0) { logger.info('适配器', `开始上传 ${imgPaths.length} 张图片...`, meta); // 点击上传菜单按钮 - const uploadMenuBtn = page.locator('button[aria-haspopup="menu"]').first(); + const uploadMenuBtn = page.locator('main button[aria-haspopup="menu"]').first(); await safeClick(page, uploadMenuBtn, { bias: 'button' }); - await sleep(500, 1000); + await sleep(300, 500); // 点击上传文件选项 const uploadItem = page.locator('div[data-testid="upload_file_panel_upload_item"][role="menuitem"]'); @@ -65,7 +63,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { }); logger.info('适配器', '图片上传完成', meta); - await sleep(1000, 1500); } // 3. 切换深度思考模式 (如需) @@ -78,18 +75,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { if (useThinking && !isChecked) { logger.debug('适配器', '启用深度思考模式...', meta); await safeClick(page, deepThinkBtn, { bias: 'button' }); - await sleep(500, 800); } else if (!useThinking && isChecked) { logger.debug('适配器', '关闭深度思考模式...', meta); await safeClick(page, deepThinkBtn, { bias: 'button' }); - await sleep(500, 800); } } // 4. 填写提示词 await safeClick(page, inputLocator, { bias: 'input' }); - await fillPrompt(page, inputLocator, prompt, meta); - await sleep(500, 1000); + await humanType(page, inputLocator, prompt); // 5. 设置 SSE 监听 logger.debug('适配器', '启动 SSE 监听...', meta); diff --git a/src/backend/adapter/gemini.js b/src/backend/adapter/gemini.js index 09e2203..4415961 100644 --- a/src/backend/adapter/gemini.js +++ b/src/backend/adapter/gemini.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, normalizeHttpError, moveMouseAway, @@ -43,15 +43,13 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, inputLocator, { click: false }); - await sleep(1500, 2500); - // 2. 上传图片 (使用 filechooser 事件,因为 Firefox 不会创建 DOM input 元素) + // 2. 上传图片 if (imgPaths && imgPaths.length > 0) { - // 点击加号按钮打开菜单 + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片...`, meta); logger.debug('适配器', '点击加号按钮...', meta); const uploadMenuBtn = page.getByRole('button', { name: 'Open upload file menu' }); await safeClick(page, uploadMenuBtn, { bias: 'button' }); - await sleep(500, 1000); // 使用公共函数上传文件 const uploadFilesBtn = page.getByRole('button', { name: /Upload files/ }); @@ -63,20 +61,18 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { url.includes('upload_id='); } }); - - await sleep(1000, 2000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, inputLocator, { bias: 'input' }); - await fillPrompt(page, inputLocator, prompt, meta); - await sleep(500, 1000); + await humanType(page, inputLocator, prompt); // 4. 点击 Tools 按钮启用图片/视频生成 logger.debug('适配器', '点击 Tools 按钮...', meta); const toolsBtn = page.getByRole('button', { name: 'Tools' }); await safeClick(page, toolsBtn, { bias: 'button' }); - await sleep(500, 1000); // 检测是否是视频模型 const isVideoModel = modelId && modelId.startsWith('veo-'); @@ -99,23 +95,26 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { const createImagesBtn = page.getByRole('button', { name: 'Create images' }); await safeClick(page, createImagesBtn, { bias: 'button' }); } - await sleep(500, 1000); - // 6. 点击发送 - logger.debug('适配器', '点击发送...', meta); + // 6. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const streamApiResponsePromise = waitApiResponse(page, { + urlMatch: 'assistant.lamda.BardFrontendService/StreamGenerate', + method: 'POST', + timeout: 120000, + meta + }); + + // 7. 发送提示词 + logger.info('适配器', '发送提示词...', meta); await safeClick(page, sendBtnLocator, { bias: 'button' }); logger.info('适配器', '等待生成结果...', meta); - // 7. 等待 StreamGenerate API + // 8. 等待 StreamGenerate API let streamApiResponse; try { - streamApiResponse = await waitApiResponse(page, { - urlMatch: 'assistant.lamda.BardFrontendService/StreamGenerate', - method: 'POST', - timeout: 120000, - meta - }); + streamApiResponse = await streamApiResponsePromise; } catch (e) { const pageError = normalizePageError(e, meta); if (pageError) return pageError; diff --git a/src/backend/adapter/gemini_biz.js b/src/backend/adapter/gemini_biz.js index 58e71cb..c169781 100644 --- a/src/backend/adapter/gemini_biz.js +++ b/src/backend/adapter/gemini_biz.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, normalizePageError, normalizeHttpError, waitApiResponse, @@ -111,7 +110,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 开启新对话 - 先等待可能正在进行的登录处理完成 await waitForPageAuth(page); - logger.info('适配器', '开启新会话', meta); await gotoWithCheck(page, targetUrl); @@ -121,42 +119,24 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 logger.debug('适配器', '正在寻找输入框...', meta); await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); - // 2. 上传图片 (uploadImages - 使用自定义验证器) + // 2. 上传图片 if (imgPaths && imgPaths.length > 0) { - const expectedUploads = imgPaths.length; - let uploadedCount = 0; - let metadataCount = 0; - + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片...`, meta); await pasteImages(page, INPUT_SELECTOR, imgPaths, { uploadValidator: (response) => { const url = response.url(); - if (response.status() === 200) { - if (url.includes('global/widgetAddContextFile')) { - uploadedCount++; - logger.debug('适配器', `图片上传进度 (Add): ${uploadedCount}/${expectedUploads}`, meta); - return false; - } else if (url.includes('global/widgetListSessionFileMetadata')) { - metadataCount++; - logger.info('适配器', `图片上传进度: ${metadataCount}/${expectedUploads}`, meta); - - if (uploadedCount >= expectedUploads && metadataCount >= expectedUploads) { - return true; - } - } - } - return false; + // 只追踪 widgetAddContextFile 请求,每个请求代表一张图片上传 + return response.status() === 200 && url.includes('global/widgetAddContextFile'); } }); - - await sleep(1000, 2000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 (fillPrompt) + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); // 4. 设置拦截器 logger.debug('适配器', '已启用请求拦截', meta); @@ -191,26 +171,26 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await route.continue(); }); - // 5. 提交 - logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: 'md-icon-button.send-button.submit, button[aria-label="提交"], button[aria-label="Send"], .send-button', - inputTarget: INPUT_SELECTOR, + // 5. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const apiResponsePromise = waitApiResponse(page, { + urlMatch: 'global/widgetStreamAssist', + method: 'POST', + timeout: 120000, + errorText: ['modelArmorViolation'], meta }); + // 6. 发送提示词 + logger.info('适配器', '发送提示词...', meta); + await safeClick(page, 'md-icon-button.send-button.submit, button[aria-label="Send"], .send-button', { bias: 'button' }); + logger.info('适配器', '等待生成结果中...', meta); - // 6. 等待 API 响应 + // 7. 等待 API 响应 let apiResponse; try { - apiResponse = await waitApiResponse(page, { - urlMatch: 'global/widgetStreamAssist', - method: 'POST', - timeout: 120000, - errorText: ['modelArmorViolation'], - meta - }); + apiResponse = await apiResponsePromise; } catch (e) { const pageError = normalizePageError(e, meta); if (pageError) return pageError; diff --git a/src/backend/adapter/gemini_biz_text.js b/src/backend/adapter/gemini_biz_text.js index aa17cd3..89bbdd0 100644 --- a/src/backend/adapter/gemini_biz_text.js +++ b/src/backend/adapter/gemini_biz_text.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, normalizePageError, normalizeHttpError, waitApiResponse, @@ -110,7 +109,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 开启新对话 - 先等待可能正在进行的登录处理完成 await waitForPageAuth(page); - logger.info('适配器', '开启新会话', meta); await gotoWithCheck(page, targetUrl); @@ -120,42 +118,24 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 logger.debug('适配器', '正在寻找输入框...', meta); await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); - // 2. 上传图片 (uploadImages - 使用自定义验证器) + // 2. 上传图片 if (imgPaths && imgPaths.length > 0) { - const expectedUploads = imgPaths.length; - let uploadedCount = 0; - let metadataCount = 0; - + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片...`, meta); await pasteImages(page, INPUT_SELECTOR, imgPaths, { uploadValidator: (response) => { const url = response.url(); - if (response.status() === 200) { - if (url.includes('global/widgetAddContextFile')) { - uploadedCount++; - logger.debug('适配器', `图片上传进度 (Add): ${uploadedCount}/${expectedUploads}`, meta); - return false; - } else if (url.includes('global/widgetListSessionFileMetadata')) { - metadataCount++; - logger.info('适配器', `图片上传进度: ${metadataCount}/${expectedUploads}`, meta); - - if (uploadedCount >= expectedUploads && metadataCount >= expectedUploads) { - return true; - } - } - } - return false; + // 只追踪 widgetAddContextFile 请求,每个请求代表一张图片上传 + return response.status() === 200 && url.includes('global/widgetAddContextFile'); } }); - - await sleep(1000, 2000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 (fillPrompt) + // 3. 输入提示词 await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + logger.info('适配器', '输入提示词...', meta); + await humanType(page, INPUT_SELECTOR, prompt); // 4. 设置请求拦截器(根据模型类型修改请求) logger.debug('适配器', '已启用请求拦截', meta); @@ -163,7 +143,10 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 判断是否为 grounding 模式 const isGrounding = modelId.endsWith('-grounding'); - const actualModelId = isGrounding ? modelId.replace('-grounding', '') : modelId; + // 从 models 列表中查找对应的 codeName + const modelConfig = manifest.models.find(m => m.id === modelId); + const baseCodeName = modelConfig?.codeName || modelId; + const actualModelId = isGrounding ? baseCodeName : baseCodeName; await page.route(url => url.href.includes('global/widgetStreamAssist'), async (route) => { const request = route.request(); @@ -198,26 +181,26 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await route.continue(); }); - // 5. 提交 (submit - 使用公共函数) - logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: 'md-icon-button.send-button.submit, button[aria-label="提交"], button[aria-label="Send"], .send-button', - inputTarget: INPUT_SELECTOR, + // 5. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const apiResponsePromise = waitApiResponse(page, { + urlMatch: 'global/widgetStreamAssist', + method: 'POST', + timeout: 120000, + errorText: ['modelArmorViolation'], meta }); + // 6. 发送提示词 + logger.info('适配器', '发送提示词...', meta); + await safeClick(page, 'md-icon-button.send-button.submit, button[aria-label="Send"], .send-button', { bias: 'button' }); + logger.info('适配器', '等待生成结果中...', meta); - // 6. 等待 API 响应 + // 7. 等待 API 响应 let apiResponse; try { - apiResponse = await waitApiResponse(page, { - urlMatch: 'global/widgetStreamAssist', - method: 'POST', - timeout: 120000, - errorText: ['modelArmorViolation'], - meta - }); + apiResponse = await apiResponsePromise; } catch (e) { const pageError = normalizePageError(e, meta); if (pageError) return pageError; @@ -330,14 +313,14 @@ export const manifest = { // 模型列表 models: [ - { id: 'gemini-3-pro', imagePolicy: 'optional', type: 'text' }, - { id: 'gemini-2.5-pro', imagePolicy: 'optional', type: 'text' }, - { id: 'gemini-3-flash-preview', imagePolicy: 'optional', type: 'text' }, - { id: 'gemini-2.5-flash', imagePolicy: 'optional', type: 'text' }, - { id: 'gemini-3-pro-grounding', imagePolicy: 'optional', type: 'text' }, - { id: 'gemini-2.5-pro-grounding', imagePolicy: 'optional', type: 'text' }, - { id: 'gemini-2.5-flash-grounding', imagePolicy: 'optional', type: 'text' }, - { id: 'gemini-3-flash-preview-grounding', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-3-pro', codeName: 'gemini-3-pro-preview', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-2.5-pro', codeName: 'gemini-2.5pro', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-3-flash-preview', codeName: 'gemini-3-pro-preview', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-2.5-flash', codeName: 'gemini-2.5-flash', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-3-pro-grounding', codeName: 'gemini-3-pro-preview', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-2.5-pro-grounding', codeName: 'gemini-2.5-pro', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-2.5-flash-grounding', codeName: 'gemini-2.5-flash', imagePolicy: 'optional', type: 'text' }, + { id: 'gemini-3-flash-preview-grounding', codeName: 'gemini-3-flash-preview', imagePolicy: 'optional', type: 'text' }, ], // 导航处理器 diff --git a/src/backend/adapter/gemini_text.js b/src/backend/adapter/gemini_text.js index 4167bd3..ecef223 100644 --- a/src/backend/adapter/gemini_text.js +++ b/src/backend/adapter/gemini_text.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, normalizeHttpError, moveMouseAway, @@ -41,14 +41,13 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, inputLocator, { click: false }); - await sleep(1500, 2500); // 2. 上传图片 if (imgPaths && imgPaths.length > 0) { + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片...`, meta); logger.debug('适配器', '点击加号按钮...', meta); const uploadMenuBtn = page.getByRole('button', { name: 'Open upload file menu' }); await safeClick(page, uploadMenuBtn, { bias: 'button' }); - await sleep(500, 1000); const uploadFilesBtn = page.getByRole('button', { name: /Upload files/ }); await uploadFilesViaChooser(page, uploadFilesBtn, imgPaths, { @@ -59,16 +58,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { url.includes('upload_id='); } }); - - await sleep(1000, 2000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, inputLocator, { bias: 'input' }); - await fillPrompt(page, inputLocator, prompt, meta); - await sleep(500, 1000); + await humanType(page, inputLocator, prompt); - // 4. 选择模型(如果指定了 modelId) + // 4. 选择模型 if (modelId) { try { logger.debug('适配器', `准备选择模型: ${modelId}`, meta); @@ -83,11 +81,11 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await page.keyboard.press('Tab'); await sleep(100, 200); await page.keyboard.press('Tab'); - await sleep(200, 300); + await sleep(100, 200); // 按回车打开模型菜单 await page.keyboard.press('Enter'); - await sleep(500, 800); + await sleep(300, 500); // 获取所有 menuitemradio 选项 const menuItems = await page.getByRole('menuitemradio').all(); @@ -146,8 +144,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 按 Escape 关闭菜单 await page.keyboard.press('Escape'); } - - await sleep(300, 500); } } catch (e) { logger.warn('适配器', `模型选择失败: ${e.message},继续使用默认模型`, meta); @@ -158,21 +154,25 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { } } - // 5. 点击发送 - logger.debug('适配器', '点击发送...', meta); + // 5. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const apiResponsePromise = waitApiResponse(page, { + urlMatch: 'assistant.lamda.BardFrontendService/StreamGenerate', + method: 'POST', + timeout: 120000, + meta + }); + + // 6. 发送提示词 + logger.info('适配器', '发送提示词...', meta); await safeClick(page, sendBtnLocator, { bias: 'button' }); logger.info('适配器', '等待生成结果...', meta); - // 5. 等待 API 响应 + // 7. 等待 API 响应 let apiResponse; try { - apiResponse = await waitApiResponse(page, { - urlMatch: 'assistant.lamda.BardFrontendService/StreamGenerate', - method: 'POST', - timeout: 120000, - meta - }); + apiResponse = await apiResponsePromise; } catch (e) { const pageError = normalizePageError(e, meta); if (pageError) return pageError; diff --git a/src/backend/adapter/google_flow.js b/src/backend/adapter/google_flow.js index 6c5aec4..2f166b5 100644 --- a/src/backend/adapter/google_flow.js +++ b/src/backend/adapter/google_flow.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, moveMouseAway, waitForInput, @@ -68,14 +68,12 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 导航到入口页面 logger.info('适配器', '开启新会话...', meta); await gotoWithCheck(page, TARGET_URL); - await sleep(1500, 2500); - // 2. 创建项目 - 点击 add_2 按钮 + // 2. 创建项目 logger.debug('适配器', '创建新项目...', meta); const addProjectBtn = page.getByRole('button', { name: /^add_2/ }); await addProjectBtn.waitFor({ state: 'visible', timeout: 30000 }); await safeClick(page, addProjectBtn, { bias: 'button' }); - await sleep(1000, 1500); // 3. 选择 Images 模式 (通过 combobox + option 选择) logger.debug('适配器', '选择图片制作模式...', meta); @@ -84,20 +82,18 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { }); await modeCombo.first().waitFor({ state: 'visible', timeout: 10000 }); await safeClick(page, modeCombo.first(), { bias: 'button' }); - await sleep(500, 800); const imageOption = page.getByRole('option').filter({ has: page.locator('i', { hasText: 'add_photo_alternate' }) }); await safeClick(page, imageOption.first(), { bias: 'button' }); - await sleep(1000, 1500); // 4. 打开 Tune 菜单进行配置 logger.debug('适配器', '打开设置菜单...', meta); const tuneBtn = page.getByRole('button', { name: /^tune/ }); await tuneBtn.waitFor({ state: 'visible', timeout: 10000 }); await safeClick(page, tuneBtn, { bias: 'button' }); - await sleep(800, 1200); + await sleep(300, 500); // 4.1 设置生成数量为 1 (链式 filter:包含数字1-4,排除模型和尺寸关键词) logger.debug('适配器', '设置生成数量为 1...', meta); @@ -110,7 +106,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await safeClick(page, countCombobox.first(), { bias: 'button' }); await sleep(300, 500); await safeClick(page, page.getByRole('option', { name: '1' }), { bias: 'button' }); - await sleep(300, 500); logger.debug('适配器', '生成数量已设置为 1', meta); } else { logger.warn('适配器', '未找到数量选择 combobox,跳过', meta); @@ -125,7 +120,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await safeClick(page, modelCombobox.first(), { bias: 'button' }); await sleep(300, 500); await safeClick(page, page.getByRole('option', { name: codeName, exact: true }), { bias: 'button' }); - await sleep(300, 500); logger.debug('适配器', `模型已设置为 ${codeName}`, meta); } @@ -139,7 +133,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await sleep(300, 500); const sizeOption = page.getByRole('option').filter({ hasText: imageSize }); await safeClick(page, sizeOption.first(), { bias: 'button' }); - await sleep(300, 500); logger.debug('适配器', `尺寸已设置为 ${imageSize}`, meta); } @@ -152,15 +145,14 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { logger.debug('适配器', `上传图片 ${i + 1}/${imgPaths.length}...`, meta); // 5.1 点击 add 按钮 + await sleep(300, 500); const addBtn = page.getByRole('button', { name: 'add' }); await addBtn.waitFor({ state: 'visible', timeout: 10000 }); await safeClick(page, addBtn, { bias: 'button' }); - await sleep(500, 1000); // 5.2 点击 upload 按钮并选择文件(不等待上传完成) const uploadBtn = page.getByRole('button', { name: /^upload/ }); await uploadFilesViaChooser(page, uploadBtn, [imgPath]); - await sleep(500, 1000); // 5.3 先启动上传监听,再点击 crop 按钮 const uploadResponsePromise = waitApiResponse(page, { @@ -176,18 +168,16 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 5.4 等待上传完成 await uploadResponsePromise; logger.info('适配器', `图片 ${i + 1} 上传完成`, meta); - await sleep(1000, 1500); } - logger.info('适配器', '所有图片上传完成', meta); + logger.info('适配器', '图片上传完成', meta); } // 6. 输入提示词 logger.info('适配器', '输入提示词...', meta); const textarea = page.locator('textarea[placeholder]'); await waitForInput(page, textarea, { click: true }); - await fillPrompt(page, textarea, prompt, meta); - await sleep(500, 1000); + await humanType(page, textarea, prompt); // 7. 先启动 API 监听,再点击发送 logger.debug('适配器', '启动 API 监听...', meta); @@ -198,8 +188,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { meta }); - // 8. 点击发送按钮 - logger.info('适配器', '点击发送...', meta); + // 8. 发送提示词 + logger.info('适配器', '发送提示词...', meta); const sendBtn = page.getByRole('button', { name: /^arrow_forward/ }); await sendBtn.waitFor({ state: 'visible', timeout: 10000 }); await safeClick(page, sendBtn, { bias: 'button' }); diff --git a/src/backend/adapter/lmarena.js b/src/backend/adapter/lmarena.js index 5117d63..b30fe59 100644 --- a/src/backend/adapter/lmarena.js +++ b/src/backend/adapter/lmarena.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, waitApiResponse, normalizePageError, normalizeHttpError, @@ -56,19 +55,14 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { const { page, config } = context; const textareaSelector = 'textarea'; - // Worker 已验证,直接解析模型配置 - //const modelConfig = manifest.models.find(m => m.id === modelId); - //const codeName = modelConfig?.codeName; - try { logger.info('适配器', '开启新会话...', meta); await gotoWithCheck(page, TARGET_URL); // 1. 等待输入框加载 await waitForInput(page, textareaSelector, { click: false }); - await sleep(1500, 2500); - // 2. 选择模型(必须在上传图片之前,因为能否上传图片取决于模型 imagePolicy) + // 2. 选择模型 if (modelId) { logger.debug('适配器', `选择模型: ${modelId}`, meta); const modelCombobox = page.locator('#chat-area') @@ -77,45 +71,53 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await modelCombobox.waitFor({ state: 'visible', timeout: 10000 }); await safeClick(page, modelCombobox, { bias: 'button' }); - await sleep(500, 800); - // 模拟粘贴输入模型 ID 并回车 + // 模拟粘贴输入模型 ID await page.evaluate((text) => { document.execCommand('insertText', false, text); }, modelId); - await sleep(300, 500); + + // 等待下拉选项出现后再按回车 + try { + await page.waitForSelector('[role="option"]', { timeout: 5000 }); + } catch { + // 超时也继续,可能选项已经存在 + } + await sleep(200, 300); await page.keyboard.press('Enter'); - await sleep(500, 800); } - // 3. 上传图片 (uploadImages) + // 3. 上传图片 if (imgPaths && imgPaths.length > 0) { + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片`, meta); await pasteImages(page, textareaSelector, imgPaths); + logger.info('适配器', '图片上传完成', meta); } - // 4. 填写提示词 (fillPrompt) + // 4. 输入提示词 await safeClick(page, textareaSelector, { bias: 'input' }); - await fillPrompt(page, textareaSelector, prompt, meta); + logger.info('适配器', '输入提示词...', meta); + await humanType(page, textareaSelector, prompt); - // 5. 提交表单 (submit) - logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: 'button[type="submit"]', - inputTarget: textareaSelector, + // 5. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const responsePromise = waitApiResponse(page, { + urlMatch: '/nextjs-api/stream', + method: 'POST', + timeout: 120000, meta }); + // 6. 发送提示词 + logger.info('适配器', '发送提示词...', meta); + await safeClick(page, 'button[type="submit"]', { bias: 'button' }); + logger.info('适配器', '等待生成结果...', meta); - // 6. 等待 API 响应 (waitApiResponse) + // 7. 等待 API 响应 let response; try { - response = await waitApiResponse(page, { - urlMatch: '/nextjs-api/stream', - method: 'POST', - timeout: 120000, - meta - }); + response = await responsePromise; } catch (e) { // 使用公共错误处理 const pageError = normalizePageError(e, meta); @@ -126,7 +128,7 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 7. 解析响应结果 const content = await response.text(); - // 8. 检查 HTTP 错误 (normalizeHttpError) + // 8. 检查 HTTP 错误 const httpError = normalizeHttpError(response, content); if (httpError) { logger.error('适配器', `请求生成时返回错误: ${httpError.error}`, meta); diff --git a/src/backend/adapter/lmarena_text.js b/src/backend/adapter/lmarena_text.js index e40c0ab..7676a2e 100644 --- a/src/backend/adapter/lmarena_text.js +++ b/src/backend/adapter/lmarena_text.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, waitApiResponse, normalizePageError, normalizeHttpError, @@ -33,12 +32,12 @@ const TARGET_URL_SEARCH = 'https://lmarena.ai/zh/c/new?mode=direct&chat-modality * @returns {Promise<{image?: string, text?: string, error?: string}>} 生成结果 */ async function generate(context, prompt, imgPaths, modelId, meta = {}) { - const { page, config } = context; + const { page } = context; const textareaSelector = 'textarea'; // Worker 已验证,直接解析模型配置 const modelConfig = manifest.models.find(m => m.id === modelId); - const { codeName, search } = modelConfig || {}; + const { search } = modelConfig || {}; const targetUrl = search ? TARGET_URL_SEARCH : TARGET_URL; try { @@ -47,9 +46,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, textareaSelector, { click: false }); - await sleep(1500, 2500); - // 2. 选择模型(必须在上传图片之前,因为能否上传图片取决于模型 imagePolicy) + // 2. 选择模型 if (modelId) { logger.debug('适配器', `选择模型: ${modelId}`, meta); const modelCombobox = page.locator('#chat-area') @@ -58,45 +56,53 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await modelCombobox.waitFor({ state: 'visible', timeout: 10000 }); await safeClick(page, modelCombobox, { bias: 'button' }); - await sleep(500, 800); - // 模拟粘贴输入模型 ID 并回车 + // 模拟粘贴输入模型 ID await page.evaluate((text) => { document.execCommand('insertText', false, text); }, modelId); - await sleep(300, 500); + + // 等待下拉选项出现后再按回车 + try { + await page.waitForSelector('[role="option"]', { timeout: 5000 }); + } catch { + // 超时也继续,可能选项已经存在 + } + await sleep(200, 300); await page.keyboard.press('Enter'); - await sleep(500, 800); } - // 3. 上传图片 (uploadImages) + // 3. 上传图片 if (imgPaths && imgPaths.length > 0) { + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片`, meta); await pasteImages(page, textareaSelector, imgPaths); + logger.info('适配器', '图片上传完成', meta); } - // 4. 填写提示词 (fillPrompt) + // 4. 填写提示词 await safeClick(page, textareaSelector, { bias: 'input' }); - await fillPrompt(page, textareaSelector, prompt, meta); + logger.info('适配器', '输入提示词...', meta); + await humanType(page, textareaSelector, prompt); - // 5. 提交表单 (submit) - logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: 'button[type="submit"]', - inputTarget: textareaSelector, + // 5. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const responsePromise = waitApiResponse(page, { + urlMatch: '/nextjs-api/stream', + method: 'POST', + timeout: 120000, meta }); + // 6. 发送提示词 + logger.info('适配器', '发送提示词...', meta); + await safeClick(page, 'button[type="submit"]', { bias: 'button' }); + logger.info('适配器', '等待生成结果...', meta); - // 6. 等待 API 响应 (waitApiResponse) + // 7. 等待 API 响应 let response; try { - response = await waitApiResponse(page, { - urlMatch: '/nextjs-api/stream', - method: 'POST', - timeout: 120000, - meta - }); + response = await responsePromise; } catch (e) { // 使用公共错误处理 const pageError = normalizePageError(e, meta); diff --git a/src/backend/adapter/nanobananafree_ai.js b/src/backend/adapter/nanobananafree_ai.js index 3e4cbed..80cbefe 100644 --- a/src/backend/adapter/nanobananafree_ai.js +++ b/src/backend/adapter/nanobananafree_ai.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, waitApiResponse, normalizePageError, normalizeHttpError, @@ -42,40 +41,43 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, textareaSelector, { click: false }); - await sleep(1500, 2500); + //await sleep(1500, 2500); - // 2. 上传图片 (uploadImages - 仅取第一张) + // 2. 上传图片 (仅取第一张) if (imgPaths && imgPaths.length > 0) { + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片`, meta); const singleImage = [imgPaths[0]]; if (imgPaths.length > 1) { logger.warn('适配器', `此后端仅支持1张图片, 已丢弃 ${imgPaths.length - 1} 张`, meta); } await pasteImages(page, textareaSelector, singleImage); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 (fillPrompt) + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, textareaSelector, { bias: 'input' }); - await fillPrompt(page, textareaSelector, prompt, meta); + await humanType(page, textareaSelector, prompt); - // 4. 提交表单 (submit) - logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: 'div[class*="_sendButton_"]', - inputTarget: textareaSelector, + // 4. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const responsePromise = waitApiResponse(page, { + urlMatch: 'v1/generateContent', + method: 'POST', + timeout: 120000, meta }); + // 5. 发送提示词 + logger.info('适配器', '发送提示词...', meta); + await safeClick(page, 'div[class*="_sendButton_"]', { bias: 'button' }); + logger.info('适配器', '等待生成结果...', meta); - // 5. 等待 API 响应 (waitApiResponse) + // 6. 等待 API 响应 let response; try { - response = await waitApiResponse(page, { - urlMatch: 'v1/generateContent', - method: 'POST', - timeout: 120000, - meta - }); + response = await responsePromise; } catch (e) { // 使用公共错误处理 const pageError = normalizePageError(e, meta); diff --git a/src/backend/adapter/sora.js b/src/backend/adapter/sora.js index 715b1df..a3fce8b 100644 --- a/src/backend/adapter/sora.js +++ b/src/backend/adapter/sora.js @@ -4,11 +4,11 @@ import { sleep, + humanType, safeClick, uploadFilesViaChooser } from '../engine/utils.js'; import { - fillPrompt, normalizePageError, moveMouseAway, waitForInput, @@ -33,12 +33,6 @@ const INPUT_SELECTOR = 'textarea'; async function generate(context, prompt, imgPaths, modelId, meta = {}) { const { page } = context; - // 只使用第一张图片 - const singleImgPath = imgPaths && imgPaths.length > 0 ? [imgPaths[0]] : []; - if (imgPaths && imgPaths.length > 1) { - logger.warn('适配器', `Sora 只支持一张图片,已丢弃 ${imgPaths.length - 1} 张`, meta); - } - // 用于存储任务 ID 和视频 URL let taskId = null; let videoUrl = null; @@ -49,10 +43,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); - // 2. 上传图片 (如果有) - if (singleImgPath.length > 0) { + // 2. 上传图片 (仅取第一张) + if (imgPaths && imgPaths.length > 0) { + logger.info('适配器', `开始上传 ${imgPaths.length} 张图片`, meta); + const singleImgPath = [imgPaths[0]]; + if (imgPaths.length > 1) { + logger.warn('适配器', `此后端仅支持1张图片,已丢弃 ${imgPaths.length - 1} 张`, meta); + } + logger.debug('适配器', '点击上传文件按钮...', meta); const attachBtn = page.getByRole('button', { name: 'Attach media' }); @@ -60,20 +59,18 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { uploadValidator: (response) => { const url = response.url(); if (response.status() === 200 && url.includes('project_y/file/upload')) { - logger.info('适配器', '图片上传完成', meta); return true; } return false; } }); - - await sleep(1000, 2000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); // 4. 提前设置响应监听器 (drafts 接口) // 因为 drafts 请求在 pending/v2 检测到任务消失后立即出现,需要提前监听 @@ -89,7 +86,7 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { }; // 5. 点击 Create video 按钮并监听 nf/create 请求 - logger.debug('适配器', '点击创建视频...', meta); + logger.debug('适配器', '设置监听器视频...', meta); const createBtn = page.getByRole('button', { name: 'Create video' }); // 设置 create 请求监听 @@ -101,6 +98,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { return true; }, { timeout: 60000 }); + // 发送提示词 + logger.info('适配器', '发送提示词...', meta); await safeClick(page, createBtn, { bias: 'button' }); // 等待 create 响应 diff --git a/src/backend/adapter/zai_is.js b/src/backend/adapter/zai_is.js index 95155a2..e43e2e0 100644 --- a/src/backend/adapter/zai_is.js +++ b/src/backend/adapter/zai_is.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, normalizePageError, normalizeHttpError, waitApiResponse, @@ -49,14 +48,12 @@ async function handleDiscordAuth(page) { try { // 等待页面加载完成,点击唯一的 button 标签 await page.waitForSelector('button', { timeout: 30000 }); - await sleep(1000, 1500); await safeClick(page, 'button', { bias: 'button' }); logger.info('适配器', '[登录器(zai_is)] 已点击登录按钮,等待跳转到 Discord...'); // 2. 等待跳转到 Discord OAuth2 授权页面 await page.waitForURL(url => url.href.includes('discord.com/oauth2/authorize'), { timeout: 60000 }); logger.info('适配器', '[登录器(zai_is)] 已到达 Discord 授权页面'); - await sleep(2000, 3000); // 3. 使用鼠标滚轮滚动 main 元素,直到授权按钮可用 // 授权按钮选择器: data-align="stretch" 的 div 中的最后一个按钮 (授权按钮在右边) @@ -68,7 +65,7 @@ async function handleDiscordAuth(page) { const isDisabled = await authorizeBtn.evaluate(el => el.disabled).catch(() => true); if (!isDisabled) { logger.info('适配器', '[登录器(zai_is)] 授权按钮已可用,正在点击...'); - await sleep(500, 1000); + await sleep(300, 500); await safeClick(page, authorizeBtn, { bias: 'button' }); break; } @@ -83,7 +80,6 @@ async function handleDiscordAuth(page) { await page.mouse.wheel(0, 200); } } - await sleep(800, 1200); } // 4. 等待跳转回 zai.is (不包含 auth 和 discord) @@ -96,7 +92,7 @@ async function handleDiscordAuth(page) { }, { timeout: 60000 }); logger.info('适配器', '[登录器(zai_is)] Discord 登录完成'); - await sleep(2000, 3000); + await sleep(500, 1000); unlockPageAuth(page); return true; } catch (err) { @@ -135,23 +131,33 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 logger.debug('适配器', '正在寻找输入框...', meta); await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); // 2. 上传图片 if (imgPaths && imgPaths.length > 0) { + const expectedUploads = imgPaths.length; + let uploadedCount = 0; + + logger.info('适配器', `开始上传 ${expectedUploads} 张图片`, meta); await pasteImages(page, INPUT_SELECTOR, imgPaths, { uploadValidator: (response) => { const url = response.url(); - return response.status() === 200 && url.includes('v1/files'); + if (response.status() === 200 && url.includes('v1/files')) { + uploadedCount++; + logger.info('适配器', `图片上传进度: ${uploadedCount}/${expectedUploads}`, meta); + if (uploadedCount >= expectedUploads) { + return true; + } + } + return false; } }); - await sleep(500, 1000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); // 4. 通过 UI 交互选择模型 const modelConfig = manifest.models.find(m => m.id === modelId); @@ -162,9 +168,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 点击 "Select a models" 按钮 const selectModelBtn = page.getByRole('button', { name: 'Select a model' }); await selectModelBtn.waitFor({ timeout: 5000 }); - await sleep(300, 500); await safeClick(page, selectModelBtn, { bias: 'button' }); - await sleep(500, 800); + await sleep(300, 500); // 在 "Search In Models" 文本框中输入模型名称 const searchInput = page.getByRole('textbox', { name: 'Search In Models' }); @@ -227,11 +232,7 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 7. 提交 logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: 'button[type="submit"]', - inputTarget: INPUT_SELECTOR, - meta - }); + await safeClick(page, 'button[type="submit"]', { bias: 'button' }); logger.info('适配器', '等待生成结果中...', meta); diff --git a/src/backend/adapter/zai_is_text.js b/src/backend/adapter/zai_is_text.js index a2a26ef..657b5ac 100644 --- a/src/backend/adapter/zai_is_text.js +++ b/src/backend/adapter/zai_is_text.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, normalizePageError, normalizeHttpError, waitApiResponse, @@ -48,14 +47,12 @@ async function handleDiscordAuth(page) { try { // 等待页面加载完成,点击唯一的 button 标签 await page.waitForSelector('button', { timeout: 30000 }); - await sleep(1000, 1500); await safeClick(page, 'button', { bias: 'button' }); logger.info('适配器', '[登录器(zai)] 已点击登录按钮,等待跳转到 Discord...'); // 2. 等待跳转到 Discord OAuth2 授权页面 await page.waitForURL(url => url.href.includes('discord.com/oauth2/authorize'), { timeout: 60000 }); logger.info('适配器', '[登录器(zai)] 已到达 Discord 授权页面'); - await sleep(2000, 3000); // 3. 使用鼠标滚轮滚动 main 元素,直到授权按钮可用 // 授权按钮选择器: data-align="stretch" 的 div 中的最后一个按钮 (授权按钮在右边) @@ -67,7 +64,7 @@ async function handleDiscordAuth(page) { const isDisabled = await authorizeBtn.evaluate(el => el.disabled).catch(() => true); if (!isDisabled) { logger.info('适配器', '[登录器(zai)] 授权按钮已可用,正在点击...'); - await sleep(500, 1000); + await sleep(300, 500); await safeClick(page, authorizeBtn, { bias: 'button' }); break; } @@ -82,7 +79,6 @@ async function handleDiscordAuth(page) { await page.mouse.wheel(0, 200); } } - await sleep(800, 1200); } // 4. 等待跳转回 zai.is (不包含 auth 和 discord) @@ -95,7 +91,7 @@ async function handleDiscordAuth(page) { }, { timeout: 60000 }); logger.info('适配器', '[登录器(zai)] Discord 登录完成'); - await sleep(2000, 3000); + await sleep(500, 1000); unlockPageAuth(page); return true; } catch (err) { @@ -152,13 +148,13 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 logger.debug('适配器', '正在寻找输入框...', meta); await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1500, 2500); // 2. 上传图片 (如果有多张图片,会一张一张上传,每次都是 v1/files POST 请求) if (imgPaths && imgPaths.length > 0) { const expectedUploads = imgPaths.length; let uploadedCount = 0; + logger.info('适配器', `开始上传 ${expectedUploads} 张图片`, meta); await pasteImages(page, INPUT_SELECTOR, imgPaths, { uploadValidator: (response) => { const url = response.url(); @@ -173,13 +169,13 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { } }); - await sleep(1000, 2000); + logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); // 4. 通过 UI 交互选择模型 const modelConfig = manifest.models.find(m => m.id === modelId); @@ -190,9 +186,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 点击 "Select a models" 按钮 const selectModelBtn = page.getByRole('button', { name: 'Select a model' }); await selectModelBtn.waitFor({ timeout: 5000 }); - await sleep(300, 500); await safeClick(page, selectModelBtn, { bias: 'button' }); - await sleep(500, 800); + await sleep(300, 500); // 在 "Search In Models" 文本框中输入模型名称 const searchInput = page.getByRole('textbox', { name: 'Search In Models' }); @@ -202,17 +197,12 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 按回车确认选择 await searchInput.press('Enter'); - await sleep(500, 1000); logger.info('适配器', `已选择模型: ${targetModel}`, meta); - // 5. 提交 - logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: 'button[type="submit"]', - inputTarget: INPUT_SELECTOR, - meta - }); + // 5. 发送提示词 + logger.debug('适配器', '发送提示词...', meta); + await safeClick(page, 'button[type="submit"]', { bias: 'button' }); logger.info('适配器', '等待生成结果中...', meta); diff --git a/src/backend/adapter/zenmux_ai_text.js b/src/backend/adapter/zenmux_ai_text.js index dfa0a34..858d499 100644 --- a/src/backend/adapter/zenmux_ai_text.js +++ b/src/backend/adapter/zenmux_ai_text.js @@ -4,12 +4,11 @@ import { sleep, + humanType, safeClick, pasteImages } from '../engine/utils.js'; import { - fillPrompt, - submit, normalizePageError, normalizeHttpError, waitApiResponse, @@ -52,7 +51,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await newChatBtn.waitFor({ state: 'visible', timeout: 5000 }); await safeClick(page, newChatBtn, { bias: 'button' }); logger.debug('适配器', '已点击 New Chat 按钮', meta); - await sleep(500, 1000); } catch (e) { logger.debug('适配器', `New Chat 按钮未找到或已在新会话中: ${e.message}`, meta); } @@ -60,14 +58,13 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { // 1. 等待输入框加载 logger.debug('适配器', '正在寻找输入框...', meta); await waitForInput(page, INPUT_SELECTOR, { click: false }); - await sleep(1000, 1500); // 2. 上传图片 (如果有) if (imgPaths && imgPaths.length > 0) { const expectedUploads = imgPaths.length; let uploadedCount = 0; - logger.info('适配器', `准备上传 ${expectedUploads} 张图片`, meta); + logger.info('适配器', `开始上传 ${expectedUploads} 张图片`, meta); await pasteImages(page, INPUT_SELECTOR, imgPaths, { uploadValidator: (response) => { @@ -87,15 +84,13 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { return false; } }); - - await sleep(1000, 2000); logger.info('适配器', '图片上传完成', meta); } - // 3. 填写提示词 + // 3. 输入提示词 + logger.info('适配器', '输入提示词...', meta); await safeClick(page, INPUT_SELECTOR, { bias: 'input' }); - await fillPrompt(page, INPUT_SELECTOR, prompt, meta); - await sleep(500, 1000); + await humanType(page, INPUT_SELECTOR, prompt); // 4. 设置请求拦截器(修改模型ID和providers) logger.debug('适配器', '已启用请求拦截', meta); @@ -138,25 +133,25 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { await route.continue(); }); - // 5. 提交 - logger.debug('适配器', '点击发送...', meta); - await submit(page, { - btnSelector: SEND_BUTTON_SELECTOR, - inputTarget: INPUT_SELECTOR, + // 5. 先启动 API 监听 + logger.debug('适配器', '启动 API 监听...', meta); + const apiResponsePromise = waitApiResponse(page, { + urlMatch: 'v1/chat/completions', + method: 'POST', + timeout: 120000, meta }); + // 6. 发送提示词 + logger.info('适配器', '发送提示词...', meta); + await safeClick(page, SEND_BUTTON_SELECTOR, { bias: 'button' }); + logger.info('适配器', '等待生成结果中...', meta); - // 5. 等待 API 响应 + // 7. 等待 API 响应 let apiResponse; try { - apiResponse = await waitApiResponse(page, { - urlMatch: 'v1/chat/completions', - method: 'POST', - timeout: 120000, - meta - }); + apiResponse = await apiResponsePromise; } catch (e) { const pageError = normalizePageError(e, meta); if (pageError) return pageError; @@ -241,7 +236,7 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) { * 适配器 manifest */ export const manifest = { - id: 'zenmux_ai', + id: 'zenmux_ai_text', displayName: 'Zenmux AI (文本生成)', description: '使用 Zenmux AI 平台生成文本,支持多种大语言模型。需要已登录的 ZenMux 账户。', diff --git a/src/backend/engine/utils.js b/src/backend/engine/utils.js index 0cefaac..668b436 100644 --- a/src/backend/engine/utils.js +++ b/src/backend/engine/utils.js @@ -152,15 +152,18 @@ export function getHumanClickPoint(box, type = 'random') { } /** - * 安全点击元素 (包含拟人化移动和点击) + * 安全点击元素 (包含滚动、拟人化移动和点击) * 支持 CSS selector、ElementHandle 和 Locator 三种输入 * @param {import('playwright-core').Page} page - Playwright 页面对象 * @param {string|import('playwright-core').ElementHandle|import('playwright-core').Locator} target - CSS 选择器、元素句柄或 Locator * @param {object} [options] - 点击选项 * @param {string} [options.bias='random'] - 偏移偏好: 'input' 或 'random' + * @param {number} [options.clickCount=1] - 点击次数: 1=单击, 2=双击 * @returns {Promise} */ export async function safeClick(page, target, options = {}) { + const clickCount = options.clickCount || 1; + try { let el; @@ -179,13 +182,16 @@ export async function safeClick(page, target, options = {}) { if (!el || !el.asElement()) throw new Error(`Element handle invalid`); } + // 确保元素在可视区域内 + await el.scrollIntoViewIfNeeded().catch(() => { }); + // 使用 ghost-cursor 点击 if (page.cursor) { const box = await el.boundingBox(); if (box) { const { x, y } = getHumanClickPoint(box, options.bias || 'random'); await page.cursor.moveTo({ x, y }); - await page.mouse.click(x, y); + await page.mouse.click(x, y, { clickCount }); return; } // 如果无法获取 box,降级到默认点击 @@ -194,7 +200,7 @@ export async function safeClick(page, target, options = {}) { } // 降级逻辑 - await el.click(); + await el.click({ clickCount }); } catch (err) { throw err; } @@ -406,7 +412,7 @@ export async function pasteImages(page, target, filePaths, options = {}) { // 1. 拟人化: 先点击一下目标区域 (让后台看起来像是用户聚焦了输入框) await safeClick(page, target, { bias: 'input' }); - await sleep(500, 1000); + await sleep(300, 500); try { logger.debug('浏览器', '正在深度扫描文件上传控件...'); @@ -480,7 +486,7 @@ export async function pasteImages(page, target, filePaths, options = {}) { } else { // 默认行为: 等待上传预览出现 logger.info('浏览器', `已提交图片, 等待预览生成...`); - await sleep(2000, 4000); + await sleep(500, 1000); } } catch (e) { @@ -497,12 +503,14 @@ export async function pasteImages(page, target, filePaths, options = {}) { * @param {Object} [options] - 可选配置 * @param {Function} [options.uploadValidator] - 自定义上传确认回调函数, 接收 response 参数,返回 true 表示该响应代表一次成功上传 * @param {number} [options.timeout=60000] - 上传超时时间 (毫秒) + * @param {string} [options.clickAction='click'] - 点击动作: 'click' 或 'dblclick' * @returns {Promise} */ export async function uploadFilesViaChooser(page, triggerTarget, filePaths, options = {}) { if (!filePaths || filePaths.length === 0) return; const timeout = options.timeout || 60000; + const clickAction = options.clickAction || 'click'; const expectedUploads = filePaths.length; let uploadedCount = 0; @@ -544,8 +552,9 @@ export async function uploadFilesViaChooser(page, triggerTarget, filePaths, opti // 设置等待 filechooser 事件(在点击之前) const fileChooserPromise = page.waitForEvent('filechooser'); - // 点击触发按钮 - await safeClick(page, triggerTarget, { bias: 'button' }); + // 点击触发按钮(支持单击或双击) + const clickCount = clickAction === 'dblclick' ? 2 : 1; + await safeClick(page, triggerTarget, { bias: 'button', clickCount }); // 等待 filechooser 事件并设置文件 const fileChooser = await fileChooserPromise; diff --git a/src/backend/utils/index.js b/src/backend/utils/index.js index c3b1188..2c555b4 100644 --- a/src/backend/utils/index.js +++ b/src/backend/utils/index.js @@ -6,8 +6,6 @@ * - 页面交互 (page.js): * - waitForPageAuth/lockPageAuth/unlockPageAuth: 页面认证锁机制 * - waitForInput: 等待输入框出现(自动等待认证完成) - * - fillPrompt: 拟人化输入提示词 - * - submit: 提交表单(点击按钮失败则回退为回车) * - gotoWithCheck: 导航到 URL 并检测 HTTP 错误 * - moveMouseAway: 任务完成后移开鼠标 * - waitApiResponse: 等待 API 响应(带页面关闭监听) @@ -29,8 +27,6 @@ export { unlockPageAuth, isPageAuthLocked, waitForInput, - fillPrompt, - submit, gotoWithCheck, tryGotoWithCheck, moveMouseAway, diff --git a/src/backend/utils/page.js b/src/backend/utils/page.js index 5ff344e..7072b13 100644 --- a/src/backend/utils/page.js +++ b/src/backend/utils/page.js @@ -3,8 +3,7 @@ * @description 页面认证锁、输入框等待、表单提交等页面级操作 */ -import { sleep, humanType, safeClick, isPageValid, createPageCloseWatcher, getRealViewport, clamp, random } from '../engine/utils.js'; -import { logger } from '../../utils/logger.js'; +import { sleep, safeClick, isPageValid, createPageCloseWatcher, getRealViewport, clamp, random } from '../engine/utils.js'; // ========================================== // 页面认证锁 @@ -92,56 +91,6 @@ export async function waitForInput(page, selectorOrLocator, options = {}) { } } -/** - * 填写提示词 (通用) - * @param {import('playwright-core').Page} page - Playwright 页面对象 - * @param {string|import('playwright-core').ElementHandle} target - 输入目标 - * @param {string} prompt - 提示词内容 - * @param {object} [meta={}] - 日志元数据 - */ -export async function fillPrompt(page, target, prompt, meta = {}) { - logger.info('适配器', '正在输入提示词...', meta); - await humanType(page, target, prompt); - await sleep(800, 1500); -} - -/** - * 提交表单 (带回退逻辑) - * @param {import('playwright-core').Page} page - Playwright 页面对象 - * @param {object} options - 提交选项 - * @param {string} options.btnSelector - 按钮选择器 - * @param {string|import('playwright-core').ElementHandle} [options.inputTarget] - 输入框 - * @param {object} [options.meta={}] - 日志元数据 - * @returns {Promise} 是否成功点击按钮 - */ -export async function submit(page, options = {}) { - const { btnSelector, inputTarget, meta = {} } = options; - - try { - const btnHandle = await page.$(btnSelector); - if (btnHandle) { - await btnHandle.scrollIntoViewIfNeeded().catch(() => { }); - await sleep(200, 400); - await safeClick(page, btnHandle, { bias: 'button' }); - return true; - } - } catch (e) { - // 继续回退逻辑 - } - - // 回退:按回车提交 - logger.warn('适配器', '未找到发送按钮,尝试回车提交', meta); - if (inputTarget) { - if (typeof inputTarget === 'string') { - await page.focus(inputTarget).catch(() => { }); - } else { - await inputTarget.focus().catch(() => { }); - } - } - await page.keyboard.press('Enter'); - return false; -} - // ========================================== // 导航与鼠标 // ========================================== diff --git a/src/server/server.js b/src/server/server.js index 187decd..630c9c1 100644 --- a/src/server/server.js +++ b/src/server/server.js @@ -38,7 +38,7 @@ try { } catch (err) { logger.error('服务器', '配置加载失败', { error: err.message }); logger.error('服务器', '请先初始化配置:复制 config.example.yaml 为 config.yaml'); - process.exit(1); + process.exit(78); // 使用 78 退出码,supervisor 不会自动重启 } const {