From dffb209dac087e986efe8ddaa1611b575306ccba Mon Sep 17 00:00:00 2001 From: foxhui Date: Fri, 12 Dec 2025 23:45:19 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=88=9D=E6=AD=A5=E6=94=AF=E6=8C=81=20?= =?UTF-8?q?Gemini=20=E7=BD=91=E9=A1=B5=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 6 ++ README.md | 1 + config.example.yaml | 1 + lib/backend/adapter/gemini.js | 185 ++++++++++++++++++++++++++++++++++ lib/backend/index.js | 2 + lib/backend/models.js | 12 ++- lib/browser/utils.js | 12 ++- lib/utils/config.js | 1 + 8 files changed, 214 insertions(+), 6 deletions(-) create mode 100644 lib/backend/adapter/gemini.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 4aac042..05de3d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.3.0] - 2025-12-12 + +### Added +- **支持新网站** + - 初步支持对 Gemini 网页版的支持 + ## [2.2.3] - 2025-12-12 ### Added diff --git a/README.md b/README.md index 8bd988d..1f5f997 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ LMArenaImagenAutomator 是一个基于 Playwright + Camoufox 的自动化图像 - [Gemini Enterprise Business](https://business.gemini.google/) - [Nano Banana Free](https://nanobananafree.ai/) - [zAI](https://zai.is/) + - [Google Gemini](https://gemini.google.com/) - 未来可能支持更多网站。。。 ### ✨ 主要特性 diff --git a/config.example.yaml b/config.example.yaml index 40b83cb..de76b38 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -21,6 +21,7 @@ server: backend: # 适配器设置 # - lmarena (LMArena) + # - gemini (Gemini 网页版) # - gemini_biz (Gemini Enterprise Business) # - nanobananafree_ai (Nano Banana Free) # - zai_is (zAI) diff --git a/lib/backend/adapter/gemini.js b/lib/backend/adapter/gemini.js new file mode 100644 index 0000000..8a1c978 --- /dev/null +++ b/lib/backend/adapter/gemini.js @@ -0,0 +1,185 @@ +import { initBrowserBase } from '../../browser/launcher.js'; +import { + sleep, + safeClick, + pasteImages +} from '../../browser/utils.js'; +import { + fillPrompt, + normalizePageError, + moveMouseAway +} from '../utils.js'; +import { logger } from '../../utils/logger.js'; + +// --- 配置常量 --- +const TARGET_URL = 'https://gemini.google.com/app?hl=en'; + +/** + * 初始化浏览器会话 + * @param {object} config - 全局配置对象 + * @returns {Promise<{browser: object, page: object, config: object}>} + */ +async function initBrowser(config) { + // 输入框验证逻辑 + const waitInputValidator = async (page) => { + await page.getByRole('textbox').waitFor({ timeout: 60000 }); + await safeClick(page, page.getByRole('textbox'), { bias: 'input' }); + await sleep(500, 1000); + }; + + const base = await initBrowserBase(config, { + userDataDir: config.paths.userDataDir, + targetUrl: TARGET_URL, + productName: 'Gemini', + waitInputValidator + }); + return { ...base, config }; +} + +/** + * 执行生图任务 + * @param {object} context - 浏览器上下文 { page, config } + * @param {string} prompt - 提示词 + * @param {string[]} imgPaths - 图片路径数组 + * @param {string} [modelId] - 模型 ID (此适配器未使用) + * @param {object} [meta={}] - 日志元数据 + * @returns {Promise<{image?: string, error?: string}>} + */ +async function generateImage(context, prompt, imgPaths, modelId, meta = {}) { + const { page } = context; + const inputLocator = page.getByRole('textbox'); + const sendBtnLocator = page.getByRole('button', { name: 'Send message' }); + + try { + logger.info('适配器', '开启新会话...', meta); + await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded' }); + + // 1. 等待输入框加载 + await inputLocator.waitFor({ timeout: 30000 }); + await sleep(1500, 2500); + + // 2. 上传图片 + if (imgPaths && imgPaths.length > 0) { + const expectedUploads = imgPaths.length; + let uploadedCount = 0; + + await pasteImages(page, inputLocator, imgPaths, { + uploadValidator: (response) => { + const url = response.url(); + // 检测上传成功:google.com/upload/?upload_id= 的 POST 请求 + if (response.status() === 200 && + url.includes('google.com/upload/') && + url.includes('upload_id=')) { + uploadedCount++; + logger.info('适配器', `图片上传进度: ${uploadedCount}/${expectedUploads}`, meta); + return uploadedCount >= expectedUploads; + } + return false; + } + }); + + await sleep(1000, 2000); + } + + // 3. 填写提示词 + await safeClick(page, inputLocator, { bias: 'input' }); + await fillPrompt(page, inputLocator, prompt, meta); + await sleep(500, 1000); + + // 4. 点击 Tools 按钮启用图片生成 + logger.debug('适配器', '点击 Tools 按钮...', meta); + const toolsBtn = page.getByRole('button', { name: 'Tools' }); + await safeClick(page, toolsBtn, { bias: 'button' }); + await sleep(500, 1000); + + // 5. 点击 Create images 按钮 + logger.debug('适配器', '点击 Create images 按钮...', meta); + const createImagesBtn = page.getByRole('button', { name: 'Create images' }); + await safeClick(page, createImagesBtn, { bias: 'button' }); + await sleep(500, 1000); + + // 6. 设置响应监听 - 等待 StreamGenerate 成功后捕获图片 + let imageData = null; + + const imagePromise = new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + cleanup(); + reject(new Error('等待图片响应超时 (120秒)')); + }, 120000); + + let streamGenerateSuccess = false; + + const onResponse = async (response) => { + const url = response.url(); + + // 先等待 StreamGenerate 成功 + if (!streamGenerateSuccess && + url.includes('assistant.lamda.BardFrontendService/StreamGenerate') && + response.request().method() === 'POST' && + response.status() === 200) { + streamGenerateSuccess = true; + logger.info('适配器', '生成请求成功,等待图片...', meta); + } + + // StreamGenerate 成功后,捕获图片响应 + if (streamGenerateSuccess && + url.includes('googleusercontent.com/rd-gg-dl') && + url.includes('=s1024-rj') && + response.request().method() === 'GET' && + response.status() === 200) { + try { + // 直接获取图片二进制数据 + const buffer = await response.body(); + const base64 = buffer.toString('base64'); + + // 根据 Content-Type 确定图片格式 + const contentType = response.headers()['content-type'] || 'image/jpeg'; + imageData = `data:${contentType};base64,${base64}`; + + logger.info('适配器', '已捕获图片数据', meta); + cleanup(); + resolve(imageData); + } catch (e) { + logger.warn('适配器', `捕获图片失败: ${e.message}`, meta); + } + } + }; + + const cleanup = () => { + clearTimeout(timeout); + page.off('response', onResponse); + }; + + page.on('response', onResponse); + }); + + // 7. 点击发送 + logger.debug('适配器', '点击发送...', meta); + await safeClick(page, sendBtnLocator, { bias: 'button' }); + + logger.info('适配器', '等待生成结果...', meta); + + // 7. 等待图片响应 + const image = await imagePromise; + + if (image) { + logger.info('适配器', '已获取图片,任务完成', meta); + return { image }; + } else { + return { error: '未能获取图片' }; + } + + } catch (err) { + // 顶层错误处理 + const pageError = normalizePageError(err, meta); + if (pageError) return pageError; + + logger.error('适配器', '生成任务失败', { ...meta, error: err.message }); + return { error: `生成任务失败: ${err.message}` }; + } finally { + // 任务结束,将鼠标移至安全区域 + await moveMouseAway(page); + } +} + +export { initBrowser, generateImage }; diff --git a/lib/backend/index.js b/lib/backend/index.js index dcf3969..cc7c6a5 100644 --- a/lib/backend/index.js +++ b/lib/backend/index.js @@ -8,6 +8,7 @@ import { logger } from '../utils/logger.js'; // 导入适配器 import * as lmarenaBackend from './adapter/lmarena.js'; import * as geminiBackend from './adapter/gemini_biz.js'; +import * as geminiConsumerBackend from './adapter/gemini.js'; import * as nanobananafreeBackend from './adapter/nanobananafree_ai.js'; import * as zaiIsBackend from './adapter/zai_is.js'; @@ -34,6 +35,7 @@ config.paths = { // 适配器映射表 const ADAPTER_MAP = { 'gemini_biz': geminiBackend, + 'gemini': geminiConsumerBackend, 'nanobananafree_ai': nanobananafreeBackend, 'zai_is': zaiIsBackend, 'lmarena': lmarenaBackend diff --git a/lib/backend/models.js b/lib/backend/models.js index e274a12..be5e6e5 100644 --- a/lib/backend/models.js +++ b/lib/backend/models.js @@ -165,6 +165,13 @@ export const ZAI_IS_MODELS = { } }; +// Gemini 后端模型配置 +export const GEMINI_MODELS = { + "gemini-3-pro-image-preview": { + imagePolicy: IMAGE_POLICY.OPTIONAL + } +}; + /** * 获取后端对应的模型配置表 * @param {string} backendName - 后端名称 ('lmarena' 或 'gemini_biz' 或 'nanobananafree_ai') @@ -177,13 +184,12 @@ function getModelsConfigForBackend(backendName) { return LMARENA_MODELS; case 'gemini_biz': return GEMINI_BIZ_MODELS; + case 'gemini': + return GEMINI_MODELS; case 'nanobananafree_ai': return NANOBANANAFREE_AI_MODELS; case 'zai_is': return ZAI_IS_MODELS; - // 将来新增其它后端: - // case 'foo_site': - // return FOO_SITE_MODELS; default: return {}; } diff --git a/lib/browser/utils.js b/lib/browser/utils.js index 3de04e1..84de815 100644 --- a/lib/browser/utils.js +++ b/lib/browser/utils.js @@ -131,9 +131,9 @@ export function getHumanClickPoint(box, type = 'random') { /** * 安全点击元素 (包含拟人化移动和点击) - * 支持 CSS selector 和 ElementHandle 两种输入 + * 支持 CSS selector、ElementHandle 和 Locator 三种输入 * @param {import('playwright-core').Page} page - Playwright 页面对象 - * @param {string|import('playwright-core').ElementHandle} target - CSS 选择器或元素句柄 + * @param {string|import('playwright-core').ElementHandle|import('playwright-core').Locator} target - CSS 选择器、元素句柄或 Locator * @param {object} [options] - 点击选项 * @param {string} [options.bias='random'] - 偏移偏好: 'input' 或 'random' * @returns {Promise} @@ -142,11 +142,17 @@ export async function safeClick(page, target, options = {}) { try { let el; - // 判断是 selector 还是 ElementHandle + // 判断输入类型 if (typeof target === 'string') { + // CSS selector el = await page.$(target); if (!el) throw new Error(`未找到: ${target}`); + } else if (typeof target.elementHandle === 'function') { + // Locator (来自 page.getByRole, page.getByText 等) + el = await target.elementHandle(); + if (!el) throw new Error(`Locator 未匹配到元素`); } else { + // ElementHandle el = target; if (!el || !el.asElement()) throw new Error(`Element handle invalid`); } diff --git a/lib/utils/config.js b/lib/utils/config.js index 9648a74..3511a46 100644 --- a/lib/utils/config.js +++ b/lib/utils/config.js @@ -47,6 +47,7 @@ server: backend: # 适配器设置 # - lmarena (LMArena) + # - gemini (Gemini 网页版) # - gemini_biz (Gemini Enterprise Business) # - nanobananafree_ai (Nano Banana Free) # - zai_is (zAI)