feat: 初步支持 Gemini 网页版

This commit is contained in:
foxhui
2025-12-12 23:45:19 +08:00
Unverified
parent a0b0936c46
commit dffb209dac
8 changed files with 214 additions and 6 deletions
+6
View File
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [2.3.0] - 2025-12-12
### Added
- **支持新网站**
- 初步支持对 Gemini 网页版的支持
## [2.2.3] - 2025-12-12
### Added
+1
View File
@@ -10,6 +10,7 @@ LMArenaImagenAutomator 是一个基于 Playwright + Camoufox 的自动化图像
- [Gemini Enterprise Business](https://business.gemini.google/)
- [Nano Banana Free](https://nanobananafree.ai/)
- [zAI](https://zai.is/)
- [Google Gemini](https://gemini.google.com/)
- 未来可能支持更多网站。。。
### ✨ 主要特性
+1
View File
@@ -21,6 +21,7 @@ server:
backend:
# 适配器设置
# - lmarena (LMArena)
# - gemini (Gemini 网页版)
# - gemini_biz (Gemini Enterprise Business)
# - nanobananafree_ai (Nano Banana Free)
# - zai_is (zAI)
+185
View File
@@ -0,0 +1,185 @@
import { initBrowserBase } from '../../browser/launcher.js';
import {
sleep,
safeClick,
pasteImages
} from '../../browser/utils.js';
import {
fillPrompt,
normalizePageError,
moveMouseAway
} from '../utils.js';
import { logger } from '../../utils/logger.js';
// --- 配置常量 ---
const TARGET_URL = 'https://gemini.google.com/app?hl=en';
/**
* 初始化浏览器会话
* @param {object} config - 全局配置对象
* @returns {Promise<{browser: object, page: object, config: object}>}
*/
async function initBrowser(config) {
// 输入框验证逻辑
const waitInputValidator = async (page) => {
await page.getByRole('textbox').waitFor({ timeout: 60000 });
await safeClick(page, page.getByRole('textbox'), { bias: 'input' });
await sleep(500, 1000);
};
const base = await initBrowserBase(config, {
userDataDir: config.paths.userDataDir,
targetUrl: TARGET_URL,
productName: 'Gemini',
waitInputValidator
});
return { ...base, config };
}
/**
* 执行生图任务
* @param {object} context - 浏览器上下文 { page, config }
* @param {string} prompt - 提示词
* @param {string[]} imgPaths - 图片路径数组
* @param {string} [modelId] - 模型 ID (此适配器未使用)
* @param {object} [meta={}] - 日志元数据
* @returns {Promise<{image?: string, error?: string}>}
*/
async function generateImage(context, prompt, imgPaths, modelId, meta = {}) {
const { page } = context;
const inputLocator = page.getByRole('textbox');
const sendBtnLocator = page.getByRole('button', { name: 'Send message' });
try {
logger.info('适配器', '开启新会话...', meta);
await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded' });
// 1. 等待输入框加载
await inputLocator.waitFor({ timeout: 30000 });
await sleep(1500, 2500);
// 2. 上传图片
if (imgPaths && imgPaths.length > 0) {
const expectedUploads = imgPaths.length;
let uploadedCount = 0;
await pasteImages(page, inputLocator, imgPaths, {
uploadValidator: (response) => {
const url = response.url();
// 检测上传成功:google.com/upload/?upload_id= 的 POST 请求
if (response.status() === 200 &&
url.includes('google.com/upload/') &&
url.includes('upload_id=')) {
uploadedCount++;
logger.info('适配器', `图片上传进度: ${uploadedCount}/${expectedUploads}`, meta);
return uploadedCount >= expectedUploads;
}
return false;
}
});
await sleep(1000, 2000);
}
// 3. 填写提示词
await safeClick(page, inputLocator, { bias: 'input' });
await fillPrompt(page, inputLocator, prompt, meta);
await sleep(500, 1000);
// 4. 点击 Tools 按钮启用图片生成
logger.debug('适配器', '点击 Tools 按钮...', meta);
const toolsBtn = page.getByRole('button', { name: 'Tools' });
await safeClick(page, toolsBtn, { bias: 'button' });
await sleep(500, 1000);
// 5. 点击 Create images 按钮
logger.debug('适配器', '点击 Create images 按钮...', meta);
const createImagesBtn = page.getByRole('button', { name: 'Create images' });
await safeClick(page, createImagesBtn, { bias: 'button' });
await sleep(500, 1000);
// 6. 设置响应监听 - 等待 StreamGenerate 成功后捕获图片
let imageData = null;
const imagePromise = new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
cleanup();
reject(new Error('等待图片响应超时 (120秒)'));
}, 120000);
let streamGenerateSuccess = false;
const onResponse = async (response) => {
const url = response.url();
// 先等待 StreamGenerate 成功
if (!streamGenerateSuccess &&
url.includes('assistant.lamda.BardFrontendService/StreamGenerate') &&
response.request().method() === 'POST' &&
response.status() === 200) {
streamGenerateSuccess = true;
logger.info('适配器', '生成请求成功,等待图片...', meta);
}
// StreamGenerate 成功后,捕获图片响应
if (streamGenerateSuccess &&
url.includes('googleusercontent.com/rd-gg-dl') &&
url.includes('=s1024-rj') &&
response.request().method() === 'GET' &&
response.status() === 200) {
try {
// 直接获取图片二进制数据
const buffer = await response.body();
const base64 = buffer.toString('base64');
// 根据 Content-Type 确定图片格式
const contentType = response.headers()['content-type'] || 'image/jpeg';
imageData = `data:${contentType};base64,${base64}`;
logger.info('适配器', '已捕获图片数据', meta);
cleanup();
resolve(imageData);
} catch (e) {
logger.warn('适配器', `捕获图片失败: ${e.message}`, meta);
}
}
};
const cleanup = () => {
clearTimeout(timeout);
page.off('response', onResponse);
};
page.on('response', onResponse);
});
// 7. 点击发送
logger.debug('适配器', '点击发送...', meta);
await safeClick(page, sendBtnLocator, { bias: 'button' });
logger.info('适配器', '等待生成结果...', meta);
// 7. 等待图片响应
const image = await imagePromise;
if (image) {
logger.info('适配器', '已获取图片,任务完成', meta);
return { image };
} else {
return { error: '未能获取图片' };
}
} catch (err) {
// 顶层错误处理
const pageError = normalizePageError(err, meta);
if (pageError) return pageError;
logger.error('适配器', '生成任务失败', { ...meta, error: err.message });
return { error: `生成任务失败: ${err.message}` };
} finally {
// 任务结束,将鼠标移至安全区域
await moveMouseAway(page);
}
}
export { initBrowser, generateImage };
+2
View File
@@ -8,6 +8,7 @@ import { logger } from '../utils/logger.js';
// 导入适配器
import * as lmarenaBackend from './adapter/lmarena.js';
import * as geminiBackend from './adapter/gemini_biz.js';
import * as geminiConsumerBackend from './adapter/gemini.js';
import * as nanobananafreeBackend from './adapter/nanobananafree_ai.js';
import * as zaiIsBackend from './adapter/zai_is.js';
@@ -34,6 +35,7 @@ config.paths = {
// 适配器映射表
const ADAPTER_MAP = {
'gemini_biz': geminiBackend,
'gemini': geminiConsumerBackend,
'nanobananafree_ai': nanobananafreeBackend,
'zai_is': zaiIsBackend,
'lmarena': lmarenaBackend
+9 -3
View File
@@ -165,6 +165,13 @@ export const ZAI_IS_MODELS = {
}
};
// Gemini 后端模型配置
export const GEMINI_MODELS = {
"gemini-3-pro-image-preview": {
imagePolicy: IMAGE_POLICY.OPTIONAL
}
};
/**
* 获取后端对应的模型配置表
* @param {string} backendName - 后端名称 ('lmarena' 或 'gemini_biz' 或 'nanobananafree_ai')
@@ -177,13 +184,12 @@ function getModelsConfigForBackend(backendName) {
return LMARENA_MODELS;
case 'gemini_biz':
return GEMINI_BIZ_MODELS;
case 'gemini':
return GEMINI_MODELS;
case 'nanobananafree_ai':
return NANOBANANAFREE_AI_MODELS;
case 'zai_is':
return ZAI_IS_MODELS;
// 将来新增其它后端:
// case 'foo_site':
// return FOO_SITE_MODELS;
default:
return {};
}
+9 -3
View File
@@ -131,9 +131,9 @@ export function getHumanClickPoint(box, type = 'random') {
/**
* 安全点击元素 (包含拟人化移动和点击)
* 支持 CSS selectorElementHandle 种输入
* 支持 CSS selectorElementHandle 和 Locator 三种输入
* @param {import('playwright-core').Page} page - Playwright 页面对象
* @param {string|import('playwright-core').ElementHandle} target - CSS 选择器元素句柄
* @param {string|import('playwright-core').ElementHandle|import('playwright-core').Locator} target - CSS 选择器元素句柄或 Locator
* @param {object} [options] - 点击选项
* @param {string} [options.bias='random'] - 偏移偏好: 'input' 或 'random'
* @returns {Promise<void>}
@@ -142,11 +142,17 @@ export async function safeClick(page, target, options = {}) {
try {
let el;
// 判断是 selector 还是 ElementHandle
// 判断输入类型
if (typeof target === 'string') {
// CSS selector
el = await page.$(target);
if (!el) throw new Error(`未找到: ${target}`);
} else if (typeof target.elementHandle === 'function') {
// Locator (来自 page.getByRole, page.getByText 等)
el = await target.elementHandle();
if (!el) throw new Error(`Locator 未匹配到元素`);
} else {
// ElementHandle
el = target;
if (!el || !el.asElement()) throw new Error(`Element handle invalid`);
}
+1
View File
@@ -47,6 +47,7 @@ server:
backend:
# 适配器设置
# - lmarena (LMArena)
# - gemini (Gemini 网页版)
# - gemini_biz (Gemini Enterprise Business)
# - nanobananafree_ai (Nano Banana Free)
# - zai_is (zAI)