乐闻世界logo
搜索文章和话题

前端面试题手册

Puppeteer 如何实现页面交互和表单操作?有哪些常用的 API 和最佳实践?

Puppeteer 提供了丰富的页面交互和表单操作功能,可以模拟用户的真实操作行为,这对于自动化测试和网页爬虫非常重要。1. 基本页面操作导航到页面:// 基本导航await page.goto('https://example.com');// 等待网络空闲await page.goto('https://example.com', { waitUntil: 'networkidle2' });// 设置超时时间await page.goto('https://example.com', { timeout: 30000 });// 等待特定条件await page.goto('https://example.com', { waitUntil: ['load', 'domcontentloaded'] });刷新页面:await page.reload();await page.reload({ waitUntil: 'networkidle2' });前进和后退:await page.goBack();await page.goForward();2. 元素选择Puppeteer 支持多种选择器方式。使用 $ 选择单个元素:// 通过 CSS 选择器const element = await page.$('#my-id');const element = await page.$('.my-class');const element = await page.$('div > p');// 通过 XPathconst element = await page.$x('//div[@class="my-class"]');使用 $$ 选择多个元素:// 选择所有匹配的元素const elements = await page.$$('.item');console.log(elements.length); // 元素数量// 遍历元素for (const element of elements) { const text = await element.evaluate(el => el.textContent); console.log(text);}使用 $$eval 批量获取数据:// 获取所有元素的文本const texts = await page.$$eval('.item', elements => { return elements.map(el => el.textContent);});// 获取所有元素的属性const hrefs = await page.$$eval('a', elements => { return elements.map(el => el.href);});3. 点击操作基本点击:await page.click('#button');await page.click('.submit-btn');带选项的点击:await page.click('#button', { button: 'left', // 'left', 'right', 'middle' clickCount: 1, // 点击次数 delay: 100, // 点击延迟(毫秒) offset: { // 点击位置偏移 x: 10, y: 10 }});双击:await page.click('#button', { clickCount: 2 });右键点击:await page.click('#button', { button: 'right' });等待元素可点击:await page.waitForSelector('#button', { visible: true });await page.click('#button');4. 文本输入基本输入:await page.type('#input', 'Hello World');带选项的输入:await page.type('#input', 'Hello World', { delay: 100, // 每个字符的延迟(毫秒) clear: true // 输入前清空输入框});模拟真实打字速度:await page.type('#input', 'Hello World', { delay: 50 });清空输入框:await page.click('#input');await page.keyboard.down('Control');await page.keyboard.press('A');await page.keyboard.up('Control');await page.keyboard.press('Backspace');5. 键盘操作基本按键:await page.keyboard.press('Enter');await page.keyboard.press('Tab');await page.keyboard.press('Escape');await page.keyboard.press('Backspace');组合键:// Ctrl+Cawait page.keyboard.down('Control');await page.keyboard.press('C');await page.keyboard.up('Control');// Ctrl+A (全选)await page.keyboard.down('Control');await page.keyboard.press('A');await page.keyboard.up('Control');// Ctrl+V (粘贴)await page.keyboard.down('Control');await page.keyboard.press('V');await page.keyboard.up('Control');特殊键:await page.keyboard.press('ArrowUp');await page.keyboard.press('ArrowDown');await page.keyboard.press('ArrowLeft');await page.keyboard.press('ArrowRight');await page.keyboard.press('PageUp');await page.keyboard.press('PageDown');await page.keyboard.press('Home');await page.keyboard.press('End');6. 鼠标操作移动鼠标:await page.mouse.move(100, 100);await page.mouse.move(100, 100, { steps: 10 }); // 平滑移动点击鼠标:await page.mouse.click(100, 100);await page.mouse.click(100, 100, { button: 'left', clickCount: 1});按下和释放鼠标:await page.mouse.down();await page.mouse.up();// 拖拽操作await page.mouse.down({ x: 100, y: 100 });await page.mouse.move(200, 200, { steps: 10 });await page.mouse.up();7. 表单操作填写表单:// 文本输入await page.type('#name', 'John Doe');await page.type('#email', 'john@example.com');// 选择下拉框await page.selectOption('#country', 'CN');await page.selectOption('#country', ['CN', 'US']); // 多选// 复选框await page.click('#checkbox');const isChecked = await page.$eval('#checkbox', el => el.checked);// 单选框await page.click('#radio-male');// 文件上传await page.setInputFiles('#file-upload', '/path/to/file.pdf');await page.setInputFiles('#file-upload', ['/file1.pdf', '/file2.pdf']);提交表单:// 点击提交按钮await page.click('#submit-button');// 使用表单提交await page.evaluate(() => { document.querySelector('form').submit();});8. 滚动操作滚动到页面底部:await page.evaluate(() => { window.scrollTo(0, document.body.scrollHeight);});滚动到特定元素:await page.evaluate(() => { document.querySelector('#target').scrollIntoView();});平滑滚动:await page.evaluate(() => { window.scrollTo({ top: 1000, behavior: 'smooth' });});滚动指定距离:await page.evaluate(() => { window.scrollBy(0, 500);});9. 等待元素等待元素出现:await page.waitForSelector('.result');await page.waitForSelector('.result', { visible: true });await page.waitForSelector('.result', { hidden: true });等待 XPath:await page.waitForXPath('//div[@class="result"]');等待函数:await page.waitForFunction(() => { return document.querySelectorAll('.item').length > 5;});等待导航:await Promise.all([ page.waitForNavigation(), page.click('#link')]);10. 获取元素信息获取文本内容:const text = await page.$eval('.title', el => el.textContent);获取属性:const href = await page.$eval('a', el => el.href);const id = await page.$eval('div', el => el.id);获取多个元素信息:const texts = await page.$$eval('.item', elements => { return elements.map(el => el.textContent);});检查元素是否存在:const exists = await page.$('.element') !== null;检查元素是否可见:const isVisible = await page.$eval('.element', el => { return el.offsetParent !== null;});11. 实际应用场景场景 1:登录表单填写async function login(url, username, password) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); // 填写登录表单 await page.type('#username', username); await page.type('#password', password); // 点击登录按钮 await Promise.all([ page.waitForNavigation(), page.click('#login-button') ]); // 验证登录成功 const isLoggedIn = await page.$('.user-profile') !== null; await browser.close(); return isLoggedIn;}login('https://example.com/login', 'user@example.com', 'password');场景 2:搜索功能测试async function testSearch(url, query) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); // 输入搜索关键词 await page.type('#search-input', query); // 提交搜索 await Promise.all([ page.waitForNavigation(), page.keyboard.press('Enter') ]); // 等待搜索结果 await page.waitForSelector('.search-result'); // 获取结果数量 const resultCount = await page.$$eval('.search-result', results => { return results.length; }); console.log(`Found ${resultCount} results for "${query}"`); await browser.close(); return resultCount;}testSearch('https://example.com', 'puppeteer');场景 3:分页数据抓取async function scrapePaginatedData(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const allData = []; let hasNextPage = true; let pageNum = 1; while (hasNextPage) { await page.goto(`${url}?page=${pageNum}`); await page.waitForSelector('.item'); // 抓取当前页数据 const pageData = await page.$$eval('.item', items => { return items.map(item => ({ title: item.querySelector('.title').textContent, price: item.querySelector('.price').textContent })); }); allData.push(...pageData); console.log(`Scraped page ${pageNum}: ${pageData.length} items`); // 检查是否有下一页 hasNextPage = await page.$('.next-page:not(.disabled)') !== null; pageNum++; } await browser.close(); return allData;}scrapePaginatedData('https://example.com/products');场景 4:动态内容加载async function scrapeDynamicContent(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); // 等待初始内容加载 await page.waitForSelector('.content'); // 滚动加载更多内容 while (true) { // 滚动到底部 await page.evaluate(() => { window.scrollTo(0, document.body.scrollHeight); }); // 等待新内容加载 try { await page.waitForSelector('.new-content', { timeout: 3000 }); } catch (error) { break; // 没有新内容了 } } // 获取所有内容 const allContent = await page.$$eval('.content-item', items => { return items.map(item => item.textContent); }); await browser.close(); return allContent;}scrapeDynamicContent('https://example.com/infinite-scroll');12. 最佳实践1. 使用等待机制:// 好的做法await page.waitForSelector('.button', { visible: true });await page.click('.button');// 不好的做法await page.click('.button'); // 可能失败2. 处理动态内容:// 等待网络空闲await page.goto(url, { waitUntil: 'networkidle2' });// 等待特定元素await page.waitForSelector('.loaded-content');3. 错误处理:try { await page.click('#button');} catch (error) { console.error('Click failed:', error); // 重试逻辑}4. 性能优化:// 禁用不必要的资源await page.setRequestInterception(true);page.on('request', (request) => { if (['image', 'font', 'media'].includes(request.resourceType())) { request.abort(); } else { request.continue(); }});5. 清理资源:try { // 操作代码} finally { await browser.close();}
阅读 0·2月19日 19:48

Puppeteer 和 Selenium 有什么区别?在什么场景下应该选择 Puppeteer 而不是 Selenium?

Puppeteer 和 Selenium 都是流行的浏览器自动化工具,但它们在设计理念、实现方式和使用场景上有显著差异。1. 架构差异Puppeteer:基于 Chrome DevTools Protocol (CDP)直接与浏览器通信,无需中间层专为 Chrome/Chromium 设计使用 WebSocket 与浏览器建立连接Selenium:基于 WebDriver 协议通过 WebDriver 服务器与浏览器通信支持多种浏览器(Chrome、Firefox、Safari、Edge 等)需要安装浏览器驱动程序2. 性能对比Puppeteer:// 启动速度快const browser = await puppeteer.launch();const page = await browser.newPage();await page.goto('https://example.com'); // 快速加载Selenium:// 启动较慢const driver = await new Builder() .forBrowser('chrome') .build();await driver.get('https://example.com'); // 加载较慢性能指标对比:| 指标 | Puppeteer | Selenium ||------|-----------|----------|| 启动时间 | 快(1-2秒) | 慢(3-5秒) || 执行速度 | 快 | 中等 || 内存占用 | 较低 | 较高 || 网络请求 | 直接通信 | 通过驱动 |3. API 设计Puppeteer API:// 简洁直观的 APIawait page.click('#button');await page.type('#input', 'text');await page.waitForSelector('.result');const text = await page.$eval('.title', el => el.textContent);Selenium API:// 相对复杂的 APIawait driver.findElement(By.id('button')).click();await driver.findElement(By.id('input')).sendKeys('text');await driver.wait(until.elementLocated(By.css('.result')));const text = await driver.findElement(By.css('.title')).getText();4. 浏览器支持Puppeteer:Chrome/Chromium(主要支持)Firefox(实验性支持,通过 puppeteer-firefox)其他浏览器支持有限Selenium:ChromeFirefoxSafariEdgeOperaInternet Explorer支持几乎所有主流浏览器5. 功能特性对比Puppeteer 特有功能:// 1. 网络拦截await page.setRequestInterception(true);page.on('request', request => { if (request.resourceType() === 'image') { request.abort(); } else { request.continue(); }});// 2. 性能追踪const client = await page.target().createCDPSession();await client.send('Performance.enable');const metrics = await client.send('Performance.getMetrics');// 3. 文件下载const [download] = await Promise.all([ page.waitForEvent('download'), page.click('#download-button')]);await download.saveAs('/path/to/save');// 4. 设备模拟const devices = puppeteer.devices;const iPhone = devices['iPhone 12'];await page.emulate(iPhone);// 5. 地理位置模拟await page.setGeolocation({ latitude: 35.6895, longitude: 139.6917 });Selenium 特有功能:// 1. 多浏览器支持const driver = await new Builder() .forBrowser('firefox') .build();// 2. 分布式测试(Selenium Grid)// 可以在多台机器上并行运行测试// 3. 移动设备测试(Appium)// 支持原生移动应用测试// 4. 高级等待机制await driver.wait( until.titleIs('Expected Title'), 5000, 'Title did not match');// 5. Actions API(复杂交互)await driver.actions() .move({ origin: element }) .press() .move({ origin: targetElement }) .release() .perform();6. 使用场景Puppeteer 适用场景:网页爬虫和数据抓取生成截图和 PDF性能测试和监控CI/CD 自动化测试SPA(单页应用)测试需要网络拦截的场景Selenium 适用场景:跨浏览器兼容性测试大型企业级测试框架分布式测试环境需要支持多种浏览器的项目移动应用测试(配合 Appium)传统 Web 应用测试7. 学习曲线Puppeteer:API 简洁直观文档清晰易懂学习曲线较平缓适合初学者Selenium:API 相对复杂需要理解 WebDriver 概念学习曲线较陡峭需要更多配置8. 社区和生态系统Puppeteer:Google 官方维护活跃的 GitHub 社区丰富的插件生态持续更新和改进Selenium:开源社区维护成熟的生态系统大量第三方工具和集成广泛的企业应用9. 实际代码对比任务:登录并获取用户信息Puppeteer 实现:const puppeteer = require('puppeteer');(async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto('https://example.com/login'); // 填写表单 await page.type('#username', 'user@example.com'); await page.type('#password', 'password123'); // 提交表单并等待导航 await Promise.all([ page.waitForNavigation(), page.click('#login-button') ]); // 获取用户信息 const userInfo = await page.evaluate(() => { return { name: document.querySelector('.user-name').textContent, email: document.querySelector('.user-email').textContent }; }); console.log(userInfo); await browser.close();})();Selenium 实现:const { Builder, By, until } = require('selenium-webdriver');(async () => { const driver = await new Builder() .forBrowser('chrome') .build(); await driver.get('https://example.com/login'); // 填写表单 await driver.findElement(By.id('username')).sendKeys('user@example.com'); await driver.findElement(By.id('password')).sendKeys('password123'); // 提交表单并等待导航 await Promise.all([ driver.wait(until.titleContains('Dashboard'), 5000), driver.findElement(By.id('login-button')).click() ]); // 获取用户信息 const userInfo = { name: await driver.findElement(By.css('.user-name')).getText(), email: await driver.findElement(By.css('.user-email')).getText() }; console.log(userInfo); await driver.quit();})();10. 选择建议选择 Puppeteer 如果:主要使用 Chrome/Chromium需要高性能和快速执行需要网络拦截或性能分析项目规模较小或中等团队熟悉 Node.js需要生成截图或 PDF选择 Selenium 如果:需要支持多种浏览器需要跨浏览器兼容性测试项目规模较大或企业级需要分布式测试环境需要测试移动应用团队已有 Selenium 经验11. 混合使用策略在某些项目中,可以结合两者的优势:// 使用 Puppeteer 进行快速开发和测试const puppeteer = require('puppeteer');// 使用 Selenium 进行跨浏览器验证const { Builder, By } = require('selenium-webdriver');async function testWithPuppeteer() { // 快速测试主要功能}async function testWithSelenium() { // 跨浏览器兼容性测试}总结:Puppeteer 和 Selenium 各有优势,选择哪个工具取决于项目需求、团队技能和测试场景。Puppeteer 更适合现代 Web 应用和快速开发,而 Selenium 更适合需要跨浏览器支持的企业级测试框架。
阅读 0·2月19日 19:47

Puppeteer 如何使用 Chrome DevTools Protocol (CDP) 进行高级调试和性能分析?

Puppeteer 提供了丰富的 Chrome DevTools Protocol (CDP) 功能,允许开发者访问浏览器底层的调试和性能分析能力。1. CDP 基础创建 CDP 会话:const client = await page.target().createCDPSession();启用 CDP 域:await client.send('Performance.enable');await client.send('Network.enable');await client.send('Runtime.enable');发送 CDP 命令:const result = await client.send('Performance.getMetrics');console.log(result);监听 CDP 事件:client.on('Network.requestWillBeSent', (params) => { console.log('Request:', params.request.url);});2. 性能监控启用性能监控:const client = await page.target().createCDPSession();await client.send('Performance.enable');获取性能指标:const metrics = await client.send('Performance.getMetrics');console.log('Performance Metrics:', metrics.metrics);关键性能指标:const metrics = await client.send('Performance.getMetrics');const metricMap = {};metrics.metrics.forEach(m => metricMap[m.name] = m.value);console.log({ Timestamp: metricMap.Timestamp, Documents: metricMap.Documents, Frames: metricMap.Frames, JSEventListeners: metricMap.JSEventListeners, Nodes: metricMap.Nodes, LayoutCount: metricMap.LayoutCount, RecalcStyleCount: metricMap.RecalcStyleCount, LayoutDuration: metricMap.LayoutDuration, RecalcStyleDuration: metricMap.RecalcStyleDuration, ScriptDuration: metricMap.ScriptDuration, TaskDuration: metricMap.TaskDuration});性能追踪:// 开始追踪await client.send('Performance.enable');await client.send('Tracing.start', { traceConfig: { includedCategories: ['devtools.timeline', 'blink.user_timing'] }});// 执行操作await page.goto('https://example.com');// 停止追踪const traceData = await client.send('Tracing.stop');3. 网络监控启用网络监控:const client = await page.target().createCDPSession();await client.send('Network.enable');监控网络请求:client.on('Network.requestWillBeSent', (params) => { console.log('Request:', { url: params.request.url, method: params.request.method, type: params.type });});监控网络响应:client.on('Network.responseReceived', (params) => { console.log('Response:', { url: params.response.url, status: params.response.status, mimeType: params.response.mimeType });});获取请求体:client.on('Network.requestWillBeSent', async (params) => { if (params.request.postData) { console.log('Request body:', params.request.postData); }});获取响应体:client.on('Network.responseReceived', async (params) => { const responseBody = await client.send('Network.getResponseBody', { requestId: params.requestId }); console.log('Response body:', responseBody.body);});4. 运行时调试启用运行时监控:const client = await page.target().createCDPSession();await client.send('Runtime.enable');执行 JavaScript:const result = await client.send('Runtime.evaluate', { expression: 'document.title'});console.log('Result:', result.result.value);获取控制台日志:client.on('Runtime.consoleAPICalled', (params) => { console.log('Console:', params.type, params.args);});监听异常:client.on('Runtime.exceptionThrown', (params) => { console.error('Exception:', params.exceptionDetails);});5. DOM 监控启用 DOM 监控:const client = await page.target().createCDPSession();await client.send('DOM.enable');获取文档根节点:const root = await client.send('DOM.getDocument');console.log('Root node:', root.root);查询节点:const result = await client.send('DOM.querySelector', { nodeId: root.root.nodeId, selector: '.my-element'});console.log('Node:', result.nodeId);获取节点属性:const attributes = await client.send('DOM.getAttributes', { nodeId: result.nodeId});console.log('Attributes:', attributes.attributes);6. Page 监控启用 Page 监控:const client = await page.target().createCDPSession();await client.send('Page.enable');监听页面加载:client.on('Page.loadEventFired', () => { console.log('Page loaded');});监听导航:client.on('Page.frameNavigated', (params) => { console.log('Navigated to:', params.frame.url);});获取页面资源树:const resourceTree = await client.send('Page.getResourceTree');console.log('Resource tree:', resourceTree);7. 实际应用场景场景 1:性能分析工具async function analyzePerformance(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const client = await page.target().createCDPSession(); // 启用性能监控 await client.send('Performance.enable'); await client.send('Network.enable'); const startTime = Date.now(); await page.goto(url, { waitUntil: 'networkidle2' }); const loadTime = Date.now() - startTime; // 获取性能指标 const metrics = await client.send('Performance.getMetrics'); const metricMap = {}; metrics.metrics.forEach(m => metricMap[m.name] = m.value); // 收集网络数据 const networkData = []; client.on('Network.requestWillBeSent', (params) => { networkData.push({ url: params.request.url, method: params.request.method, timestamp: params.timestamp }); }); const report = { url, loadTime, metrics: { layoutDuration: metricMap.LayoutDuration, recalcStyleDuration: metricMap.RecalcStyleDuration, scriptDuration: metricMap.ScriptDuration, taskDuration: metricMap.TaskDuration }, networkRequests: networkData.length }; await browser.close(); return report;}analyzePerformance('https://example.com').then(console.log);场景 2:网络请求分析async function analyzeNetworkRequests(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const client = await page.target().createCDPSession(); await client.send('Network.enable'); const requests = []; client.on('Network.requestWillBeSent', (params) => { requests.push({ requestId: params.requestId, url: params.request.url, method: params.request.method, type: params.type, timestamp: params.timestamp }); }); client.on('Network.responseReceived', (params) => { const request = requests.find(r => r.requestId === params.requestId); if (request) { request.status = params.response.status; request.mimeType = params.response.mimeType; request.size = params.response.encodedDataLength; } }); await page.goto(url, { waitUntil: 'networkidle2' }); // 分析请求 const analysis = { totalRequests: requests.length, byType: {}, byStatus: {}, totalSize: 0 }; requests.forEach(req => { // 按类型统计 if (!analysis.byType[req.type]) { analysis.byType[req.type] = { count: 0, size: 0 }; } analysis.byType[req.type].count++; analysis.byType[req.type].size += req.size || 0; // 按状态码统计 if (!analysis.byStatus[req.status]) { analysis.byStatus[req.status] = 0; } analysis.byStatus[req.status]++; analysis.totalSize += req.size || 0; }); await browser.close(); return analysis;}analyzeNetworkRequests('https://example.com').then(console.log);场景 3:内存分析async function analyzeMemory(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const client = await page.target().createCDPSession(); await client.send('Runtime.enable'); await client.send('HeapProfiler.enable'); await page.goto(url, { waitUntil: 'networkidle2' }); // 获取堆快照 const heapSnapshot = await client.send('HeapProfiler.takeHeapSnapshot', { reportProgress: false }); // 获取内存使用情况 const memoryMetrics = await client.send('Runtime.getHeapUsage'); const report = { totalSize: memoryMetrics.totalSize, usedSize: memoryMetrics.usedSize, heapSnapshot: heapSnapshot }; await browser.close(); return report;}analyzeMemory('https://example.com').then(console.log);场景 4:JavaScript 执行分析async function analyzeJavaScript(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const client = await page.target().createCDPSession(); await client.send('Runtime.enable'); await client.send('Debugger.enable'); const consoleLogs = []; const exceptions = []; client.on('Runtime.consoleAPICalled', (params) => { consoleLogs.push({ type: params.type, args: params.args.map(arg => arg.value) }); }); client.on('Runtime.exceptionThrown', (params) => { exceptions.push({ message: params.exceptionDetails.exception?.description, stackTrace: params.exceptionDetails.stackTrace }); }); await page.goto(url, { waitUntil: 'networkidle2' }); const report = { consoleLogs, exceptions, hasErrors: exceptions.length > 0 }; await browser.close(); return report;}analyzeJavaScript('https://example.com').then(console.log);8. CDP 高级功能覆盖代码:const client = await page.target().createCDPSession();await client.send('DOM.enable');await client.send('CSS.enable');// 启用代码覆盖await client.send('Profiler.enable');await client.send('Profiler.startPreciseCoverage', { callCount: true, detailed: true});// 执行操作await page.goto('https://example.com');// 获取覆盖数据const coverage = await client.send('Profiler.takePreciseCoverage');console.log('Coverage:', coverage.result);监控长任务:const client = await page.target().createCDPSession();await client.send('Performance.enable');client.on('Performance.metrics', (params) => { params.metrics.forEach(metric => { if (metric.name === 'TaskDuration' && metric.value > 50) { console.warn('Long task detected:', metric.value, 'ms'); } });});监控布局抖动:const client = await page.target().createCDPSession();await client.send('Performance.enable');const layoutShifts = [];client.on('Performance.metrics', (params) => { params.metrics.forEach(metric => { if (metric.name === 'LayoutShift') { layoutShifts.push(metric.value); } });});// 计算累积布局偏移const cls = layoutShifts.reduce((sum, shift) => sum + shift, 0);console.log('Cumulative Layout Shift:', cls);9. 最佳实践1. 及时禁用 CDP 域:try { await client.send('Performance.enable'); // 操作} finally { await client.send('Performance.disable');}2. 批量获取数据:// 一次性获取多个指标const [metrics, networkData] = await Promise.all([ client.send('Performance.getMetrics'), client.send('Network.getResponseBody', { requestId: 'xxx' })]);3. 使用事件过滤:client.on('Network.requestWillBeSent', (params) => { // 只处理特定请求 if (params.request.url.includes('/api/')) { console.log('API Request:', params.request.url); }});4. 错误处理:try { await client.send('Performance.getMetrics');} catch (error) { console.error('CDP error:', error); // 降级处理}
阅读 0·2月19日 19:40

Puppeteer 中有哪些等待机制?如何正确使用它们来处理异步操作?

Puppeteer 提供了多种等待机制来处理异步操作和页面加载,确保在执行操作前页面状态已就绪。1. page.waitForNavigation()等待页面导航完成,适用于点击链接、提交表单等会触发页面跳转的操作。await Promise.all([ page.waitForNavigation(), page.click('#submit-button')]);参数选项:waitUntil: 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2'timeout: 超时时间(毫秒)2. page.waitForSelector(selector)等待指定选择器出现在页面中。await page.waitForSelector('.result-item', { visible: true });参数选项:visible: 等待元素可见hidden: 等待元素隐藏timeout: 超时时间3. page.waitForXPath(xpath)等待 XPath 选择器匹配的元素。await page.waitForXPath('//div[@class="content"]');4. page.waitForFunction(pageFunction, …args)等待自定义函数返回真值,最灵活的等待方式。await page.waitForFunction( () => document.querySelectorAll('.item').length > 5);// 带参数await page.waitForFunction( (count) => document.querySelectorAll('.item').length >= count, {}, 10);5. page.waitForTimeout(milliseconds)等待指定时间(已废弃,建议使用 setTimeout)。// 旧方法(已废弃)await page.waitForTimeout(1000);// 新方法await new Promise(resolve => setTimeout(resolve, 1000));6. page.waitForResponse(urlOrPredicate)等待特定的网络响应。// 等待特定 URL 的响应await page.waitForResponse('https://api.example.com/data');// 使用谓词函数await page.waitForResponse(response => response.url().includes('/api/') && response.status() === 200);7. page.waitForRequest(urlOrPredicate)等待特定的网络请求。await page.waitForRequest(request => request.url().includes('/api/data'));8. page.waitForFrame(frame)等待指定的 iframe 加载完成。const frame = await page.waitForFrame('iframe-name');最佳实践:1. 选择合适的等待方法:导航操作 → waitForNavigation元素操作 → waitForSelector复杂条件 → waitForFunctionAPI 调用 → waitForResponse2. 设置合理的超时时间:await page.waitForSelector('.element', { timeout: 5000 // 5 秒超时});3. 使用 Promise.all 并行等待:await Promise.all([ page.waitForNavigation(), page.click('#link'), page.waitForSelector('.loaded')]);4. 处理超时异常:try { await page.waitForSelector('.element', { timeout: 3000 });} catch (error) { console.log('Element not found within timeout');}5. 优化等待策略:// 等待网络空闲(推荐)await page.waitForNavigation({ waitUntil: 'networkidle2' });// 等待特定元素可见await page.waitForSelector('.element', { visible: true });常见问题解决:问题 1:元素存在但不可见// 解决方案:等待元素可见await page.waitForSelector('.element', { visible: true });问题 2:动态加载内容// 解决方案:使用 waitForFunction 检查内容await page.waitForFunction(() => document.querySelectorAll('.item').length > 0);问题 3:SPA 路由变化// 解决方案:等待 URL 变化await page.waitForFunction(() => window.location.pathname === '/new-page');
阅读 0·2月19日 19:40

Puppeteer 如何进行错误处理和调试?有哪些常用的调试技巧和工具?

Puppeteer 提供了多种错误处理和调试技巧,帮助开发者快速定位和解决问题,提高开发效率。1. 基本错误处理try-catch 模式:const puppeteer = require('puppeteer');async function safeExecution() { const browser = await puppeteer.launch(); const page = await browser.newPage(); try { await page.goto('https://example.com'); await page.click('#button'); } catch (error) { console.error('Error occurred:', error.message); // 错误处理逻辑 } finally { await browser.close(); }}safeExecution();超时处理:try { await page.goto('https://example.com', { timeout: 5000 });} catch (error) { if (error.name === 'TimeoutError') { console.log('Page load timeout'); }}2. 调试模式启用调试模式:// 方法 1:使用 headless: falseconst browser = await puppeteer.launch({ headless: false, slowMo: 100 // 减慢操作速度});// 方法 2:使用 devtoolsconst browser = await puppeteer.launch({ headless: false, devtools: true});使用 slowMo:const browser = await puppeteer.launch({ headless: false, slowMo: 50 // 每个操作延迟 50ms});3. 日志记录控制台日志:page.on('console', msg => { console.log('Browser console:', msg.text());});// 捕获不同类型的日志page.on('console', msg => { const type = msg.type(); const text = msg.text(); if (type === 'error') { console.error('Browser error:', text); } else if (type === 'warning') { console.warn('Browser warning:', text); } else { console.log('Browser log:', text); }});页面错误日志:page.on('pageerror', error => { console.error('Page error:', error.message);});请求失败日志:page.on('requestfailed', request => { console.log('Request failed:', request.url()); console.log('Failure:', request.failure());});4. 截图和视频录制错误时截图:async function withErrorScreenshot(page, operation) { try { await operation(); } catch (error) { const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); await page.screenshot({ path: `error-${timestamp}.png`, fullPage: true }); throw error; }}// 使用示例await withErrorScreenshot(page, async () => { await page.goto('https://example.com'); await page.click('#button');});视频录制:const { spawn } = require('child_process');async function recordVideo(page, outputPath, operation) { // 使用 ffmpeg 录制屏幕 const ffmpeg = spawn('ffmpeg', [ '-f', 'x11grab', '-r', '30', '-s', '1920x1080', '-i', ':99', '-c:v', 'libx264', '-preset', 'ultrafast', outputPath ]); try { await operation(); } finally { ffmpeg.kill('SIGINT'); }}5. 网络调试监控网络请求:page.on('request', request => { console.log('Request:', request.url());});page.on('response', response => { console.log('Response:', response.url(), response.status());});page.on('requestfinished', request => { console.log('Request finished:', request.url());});捕获请求和响应数据:const requests = [];page.on('request', request => { requests.push({ url: request.url(), method: request.method(), headers: request.headers() });});page.on('response', async response => { const request = requests.find(r => r.url === response.url()); if (request) { request.status = response.status(); request.headers = response.headers(); try { request.body = await response.text(); } catch (error) { request.body = null; } }});6. 性能追踪启用性能追踪:const client = await page.target().createCDPSession();await client.send('Performance.enable');await client.send('Network.enable');// 获取性能指标const metrics = await client.send('Performance.getMetrics');console.log('Performance metrics:', metrics);追踪时间线:await page.tracing.start({ path: 'trace.json' });// 执行操作await page.goto('https://example.com');await page.tracing.stop();7. 元素调试高亮元素:async function highlightElement(page, selector) { await page.evaluate(selector => { const element = document.querySelector(selector); if (element) { element.style.border = '3px solid red'; element.style.backgroundColor = 'yellow'; } }, selector);}检查元素状态:async function checkElement(page, selector) { const isVisible = await page.isVisible(selector); const isEnabled = await page.isDisabled(selector); const isClickable = await page.isClickable(selector); console.log('Element state:', { selector, isVisible, isEnabled, isClickable });}获取元素位置:const position = await page.evaluate(selector => { const element = document.querySelector(selector); if (element) { const rect = element.getBoundingClientRect(); return { x: rect.left, y: rect.top, width: rect.width, height: rect.height }; }}, '.element');8. 调试工具函数等待并调试:async function waitForAndDebug(page, selector, options = {}) { console.log(`Waiting for selector: ${selector}`); try { await page.waitForSelector(selector, { timeout: options.timeout || 30000, visible: options.visible !== false }); console.log(`Found selector: ${selector}`); } catch (error) { console.error(`Failed to find selector: ${selector}`); await page.screenshot({ path: 'debug-failed.png' }); throw error; }}点击并调试:async function clickAndDebug(page, selector) { console.log(`Attempting to click: ${selector}`); try { // 检查元素是否存在 const element = await page.$(selector); if (!element) { throw new Error(`Element not found: ${selector}`); } // 检查元素是否可见 const isVisible = await element.isIntersectingViewport(); if (!isVisible) { console.warn('Element is not visible, scrolling to it'); await element.scrollIntoView(); } await element.click(); console.log(`Successfully clicked: ${selector}`); } catch (error) { console.error(`Failed to click: ${selector}`, error); await page.screenshot({ path: 'debug-click-failed.png' }); throw error; }}9. 常见错误及解决方案错误 1:元素未找到// 问题:元素选择器错误await page.click('.wrong-selector');// 解决方案:使用正确的选择器await page.click('.correct-selector');// 或者等待元素出现await page.waitForSelector('.correct-selector');await page.click('.correct-selector');错误 2:元素不可点击// 问题:元素被遮挡或不可见await page.click('.hidden-button');// 解决方案:滚动到元素await page.evaluate(selector => { document.querySelector(selector).scrollIntoView();}, '.hidden-button');await page.click('.hidden-button');错误 3:超时错误// 问题:页面加载超时await page.goto('https://slow-website.com');// 解决方案:增加超时时间await page.goto('https://slow-website.com', { timeout: 60000 });// 或使用更宽松的等待条件await page.goto('https://slow-website.com', { waitUntil: 'domcontentloaded' });错误 4:内存泄漏// 问题:未关闭浏览器实例const browser = await puppeteer.launch();// 忘记关闭// 解决方案:使用 finally 确保关闭const browser = await puppeteer.launch();try { // 操作} finally { await browser.close();}10. 调试最佳实践1. 使用描述性日志:console.log(`[INFO] Navigating to ${url}`);console.log(`[DEBUG] Found ${elements.length} elements`);console.log(`[ERROR] Failed to click button: ${error.message}`);2. 保存调试信息:const debugInfo = { url: page.url(), timestamp: new Date().toISOString(), screenshot: await page.screenshot({ encoding: 'base64' }), html: await page.content(), cookies: await page.cookies()};require('fs').writeFileSync('debug.json', JSON.stringify(debugInfo, null, 2));3. 使用条件断点:await page.evaluate(() => { debugger; // 在浏览器中暂停});4. 分步调试:// 使用 slowMo 减慢操作const browser = await puppeteer.launch({ slowMo: 100 });// 或在关键步骤添加延迟await new Promise(resolve => setTimeout(resolve, 1000));5. 使用调试器:// 在代码中添加 debuggerdebugger;// 使用 Node.js 调试器运行node --inspect-brk script.js11. 测试和验证单元测试示例:const assert = require('assert');async function testPageLoad() { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto('https://example.com'); const title = await page.title(); assert.strictEqual(title, 'Example Domain'); await browser.close();}testPageLoad().catch(console.error);集成测试示例:async function testUserFlow() { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 测试登录流程 await page.goto('https://example.com/login'); await page.type('#username', 'testuser'); await page.type('#password', 'password'); await page.click('#login-button'); // 验证登录成功 await page.waitForSelector('.user-profile'); const isLoggedIn = await page.$('.user-profile') !== null; assert(isLoggedIn, 'Login failed'); await browser.close();}testUserFlow().catch(console.error);
阅读 0·2月19日 19:40

Puppeteer 如何管理 Cookie 和存储?如何实现会话持久化和多账户管理?

Puppeteer 提供了强大的 Cookie 和存储管理功能,可以模拟真实的用户会话、保持登录状态、管理本地存储等。1. Cookie 管理获取所有 Cookie:const cookies = await page.cookies();console.log(cookies);获取特定 URL 的 Cookie:const cookies = await page.cookies('https://example.com');设置 Cookie:await page.setCookie({ name: 'session_id', value: 'abc123', domain: '.example.com', path: '/', expires: Math.floor(Date.now() / 1000) + 3600, // 1 小时后过期 httpOnly: true, secure: true, sameSite: 'Lax'});设置多个 Cookie:await page.setCookie( { name: 'cookie1', value: 'value1', domain: '.example.com' }, { name: 'cookie2', value: 'value2', domain: '.example.com' });删除 Cookie:// 删除指定 Cookieawait page.deleteCookie({ name: 'session_id', domain: '.example.com' });// 删除所有 Cookieconst cookies = await page.cookies();await page.deleteCookie(...cookies);清除所有 Cookie:await page.evaluate(() => { document.cookie.split(";").forEach(c => { document.cookie = c.replace(/^ +/, "").replace(/=.*/, "=;expires=" + new Date().toUTCString() + ";path=/"); });});2. LocalStorage 管理获取 LocalStorage 数据:const localStorageData = await page.evaluate(() => { const data = {}; for (let i = 0; i < localStorage.length; i++) { const key = localStorage.key(i); data[key] = localStorage.getItem(key); } return data;});设置 LocalStorage 数据:await page.evaluate(() => { localStorage.setItem('user_id', '12345'); localStorage.setItem('preferences', JSON.stringify({ theme: 'dark' }));});获取特定 LocalStorage 项:const userId = await page.evaluate(() => { return localStorage.getItem('user_id');});删除 LocalStorage 项:await page.evaluate(() => { localStorage.removeItem('user_id');});清除所有 LocalStorage:await page.evaluate(() => { localStorage.clear();});3. SessionStorage 管理获取 SessionStorage 数据:const sessionStorageData = await page.evaluate(() => { const data = {}; for (let i = 0; i < sessionStorage.length; i++) { const key = sessionStorage.key(i); data[key] = sessionStorage.getItem(key); } return data;});设置 SessionStorage 数据:await page.evaluate(() => { sessionStorage.setItem('temp_data', 'temporary_value');});清除所有 SessionStorage:await page.evaluate(() => { sessionStorage.clear();});4. IndexedDB 管理获取 IndexedDB 数据:const indexedDBData = await page.evaluate(async () => { return new Promise((resolve, reject) => { const request = indexedDB.open('myDatabase', 1); request.onsuccess = (event) => { const db = event.target.result; const transaction = db.transaction(['myStore'], 'readonly'); const store = transaction.objectStore('myStore'); const getAllRequest = store.getAll(); getAllRequest.onsuccess = () => { resolve(getAllRequest.result); }; getAllRequest.onerror = () => { reject(getAllRequest.error); }; }; request.onerror = () => { reject(request.error); }; });});5. 浏览器上下文和隔离使用 Incognito 上下文:const context = await browser.createIncognitoBrowserContext();const page = await context.newPage();// 在隔离环境中操作await page.goto('https://example.com');// 关闭上下文,清除所有数据await context.close();多个隔离上下文:// 创建多个隔离的上下文const context1 = await browser.createIncognitoBrowserContext();const context2 = await browser.createIncognitoBrowserContext();const page1 = await context1.newPage();const page2 = await context2.newPage();// 两个上下文的 Cookie 和存储完全隔离6. 会话持久化保存会话状态:async function saveSession(page, filePath) { const cookies = await page.cookies(); const localStorage = await page.evaluate(() => { const data = {}; for (let i = 0; i < localStorage.length; i++) { const key = localStorage.key(i); data[key] = localStorage.getItem(key); } return data; }); const session = { cookies, localStorage, url: page.url(), timestamp: Date.now() }; const fs = require('fs'); fs.writeFileSync(filePath, JSON.stringify(session, null, 2));}恢复会话状态:async function restoreSession(page, filePath) { const fs = require('fs'); const session = JSON.parse(fs.readFileSync(filePath, 'utf8')); // 恢复 Cookie await page.setCookie(...session.cookies); // 恢复 LocalStorage await page.evaluate((data) => { for (const [key, value] of Object.entries(data)) { localStorage.setItem(key, value); } }, session.localStorage); // 导航到之前的 URL await page.goto(session.url);}7. 实际应用场景场景 1:保持登录状态async function loginAndSaveSession() { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 登录 await page.goto('https://example.com/login'); await page.type('#username', 'user@example.com'); await page.type('#password', 'password'); await page.click('#login-button'); await page.waitForNavigation(); // 保存会话 await saveSession(page, 'session.json'); await browser.close();}async function useSavedSession() { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 恢复会话 await restoreSession(page, 'session.json'); // 直接访问需要登录的页面 await page.goto('https://example.com/dashboard'); // 验证是否已登录 const isLoggedIn = await page.$('.user-profile') !== null; console.log('Is logged in:', isLoggedIn); await browser.close();}场景 2:多账户管理async function manageMultipleAccounts(accounts) { const browser = await puppeteer.launch(); for (const account of accounts) { // 为每个账户创建隔离的上下文 const context = await browser.createIncognitoBrowserContext(); const page = await context.newPage(); // 登录账户 await page.goto('https://example.com/login'); await page.type('#username', account.username); await page.type('#password', account.password); await page.click('#login-button'); await page.waitForNavigation(); // 执行账户操作 await page.goto('https://example.com/dashboard'); const data = await page.evaluate(() => { return document.querySelector('.user-data').textContent; }); console.log(`Account ${account.username}: ${data}`); // 关闭上下文,清除数据 await context.close(); } await browser.close();}manageMultipleAccounts([ { username: 'user1@example.com', password: 'pass1' }, { username: 'user2@example.com', password: 'pass2' }]);场景 3:A/B 测试async function abTesting(url, variants) { const browser = await puppeteer.launch(); for (const variant of variants) { const context = await browser.createIncognitoBrowserContext(); const page = await context.newPage(); // 设置 A/B 测试 Cookie await page.setCookie({ name: 'ab_test_variant', value: variant.id, domain: new URL(url).hostname }); await page.goto(url); // 收集数据 const data = await page.evaluate(() => { return { title: document.title, content: document.querySelector('.content')?.textContent }; }); console.log(`Variant ${variant.id}:`, data); await context.close(); } await browser.close();}abTesting('https://example.com', [ { id: 'A' }, { id: 'B' }]);场景 4:购物车持久化async function saveShoppingCart(page, userId) { const cartData = await page.evaluate(() => { return JSON.parse(localStorage.getItem('cart') || '[]'); }); const fs = require('fs'); const filePath = `carts/${userId}.json`; fs.writeFileSync(filePath, JSON.stringify(cartData, null, 2));}async function restoreShoppingCart(page, userId) { const fs = require('fs'); const filePath = `carts/${userId}.json`; if (fs.existsSync(filePath)) { const cartData = JSON.parse(fs.readFileSync(filePath, 'utf8')); await page.evaluate((data) => { localStorage.setItem('cart', JSON.stringify(data)); }, cartData); }}8. 安全注意事项1. 敏感数据保护:// 不要在代码中硬编码敏感信息// 使用环境变量const password = process.env.PASSWORD;// 不要将包含敏感信息的会话文件提交到版本控制// 将 session.json 添加到 .gitignore2. Cookie 安全:// 设置安全的 Cookie 属性await page.setCookie({ name: 'session', value: 'value', httpOnly: true, // 防止 XSS 攻击 secure: true, // 仅通过 HTTPS 传输 sameSite: 'Strict' // 防止 CSRF 攻击});3. 会话过期处理:async function checkSessionValidity(page) { const cookies = await page.cookies(); const sessionCookie = cookies.find(c => c.name === 'session_id'); if (!sessionCookie || sessionCookie.expires * 1000 < Date.now()) { // 会话已过期,重新登录 await relogin(page); }}9. 最佳实践1. 使用隔离上下文:// 为每个用户或会话创建隔离的上下文const context = await browser.createIncognitoBrowserContext();const page = await context.newPage();// 操作完成后关闭上下文await context.close();2. 定期清理:// 定期清理过期的 Cookie 和存储async function cleanupStorage(page) { const cookies = await page.cookies(); const validCookies = cookies.filter(c => !c.expires || c.expires * 1000 > Date.now() ); await page.deleteCookie(...cookies); await page.setCookie(...validCookies);}3. 错误处理:try { await page.setCookie(cookie);} catch (error) { console.error('Failed to set cookie:', error); // 处理错误}4. 性能优化:// 批量操作 Cookieawait page.setCookie(...cookies);// 避免频繁的存储操作const data = await page.evaluate(() => { // 一次性获取所有需要的数据 return { localStorage: { ...localStorage }, sessionStorage: { ...sessionStorage } };});
阅读 0·2月19日 19:39

Puppeteer 如何实现设备模拟和移动端测试?有哪些内置设备和自定义配置方法?

Puppeteer 提供了强大的设备模拟功能,可以模拟各种移动设备、屏幕尺寸、用户代理等,这对于响应式设计测试和移动端网页测试非常有用。1. 设备模拟基础Puppeteer 内置了多种常见设备的预设配置。const puppeteer = require('puppeteer');(async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 模拟 iPhone 12 const iPhone = puppeteer.devices['iPhone 12']; await page.emulate(iPhone); await page.goto('https://example.com'); await page.screenshot({ path: 'iphone-12.png' }); await browser.close();})();2. 内置设备列表Puppeteer 提供了以下设备预设:iPhone 系列:iPhone 12 ProiPhone 12iPhone 11 ProiPhone 11iPhone XiPhone 8iPhone 8 PlusiPhone SEiPhone 7iPhone 7 PlusiPhone 6iPhone 6 PlusiPhone 5iPhone 4iPad 系列:iPad Pro 11iPad ProiPad MiniiPadAndroid 系列:Pixel 5Pixel 4Pixel 2Galaxy S5Galaxy Note IIINexus 10Nexus 7Nexus 6Nexus 5Nexus 4其他设备:Kindle Fire HDXBlackberry Z30Blackberry PlayBookNokia N9Nokia Lumia 5203. 查看设备配置const puppeteer = require('puppeteer');// 查看所有可用设备console.log(Object.keys(puppeteer.devices));// 查看特定设备的配置const iPhone = puppeteer.devices['iPhone 12'];console.log(iPhone);/*输出:{ name: 'iPhone 12', userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1', viewport: { width: 390, height: 844, deviceScaleFactor: 3, isMobile: true, hasTouch: true, isLandscape: false }}*/4. 自定义设备配置如果内置设备不满足需求,可以创建自定义配置。const customDevice = { name: 'Custom Device', userAgent: 'Mozilla/5.0 (Custom Device) AppleWebKit/537.36', viewport: { width: 414, height: 896, deviceScaleFactor: 2, isMobile: true, hasTouch: true, isLandscape: false }};await page.emulate(customDevice);5. 手动设置视口可以单独设置视口参数而不使用完整设备模拟。// 设置视口大小await page.setViewport({ width: 1920, height: 1080, deviceScaleFactor: 1, isMobile: false, hasTouch: false, isLandscape: false});// 设置移动设备视口await page.setViewport({ width: 375, height: 667, deviceScaleFactor: 2, isMobile: true, hasTouch: true});6. 用户代理设置单独设置用户代理字符串。// 设置自定义用户代理await page.setUserAgent('Mozilla/5.0 (Custom User Agent) AppleWebKit/537.36');// 设置移动设备用户代理await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15');7. 地理位置模拟模拟设备的地理位置。// 设置地理位置await page.setGeolocation({ latitude: 35.6895, longitude: 139.6917});// 授予地理位置权限await context.overridePermissions('https://example.com', ['geolocation']);// 使用示例const browser = await puppeteer.launch();const context = await browser.createIncognitoBrowserContext();const page = await context.newPage();await page.setGeolocation({ latitude: 35.6895, longitude: 139.6917 });await context.overridePermissions('https://example.com', ['geolocation']);await page.goto('https://example.com');8. 时区模拟模拟不同的时区。// 设置时区await page.emulateTimezone('Asia/Shanghai');// 其他时区示例await page.emulateTimezone('America/New_York');await page.emulateTimezone('Europe/London');await page.emulateTimezone('UTC');9. 语言和区域设置设置浏览器的语言和区域。// 设置语言await page.setExtraHTTPHeaders({ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'});// 在启动时设置const browser = await puppeteer.launch({ args: ['--lang=zh-CN']});10. 触摸事件模拟模拟触摸设备的行为。const touchDevice = { viewport: { width: 375, height: 667, isMobile: true, hasTouch: true }};await page.setViewport(touchDevice.viewport);// 模拟触摸点击await page.tap('#button');// 模拟触摸滑动await page.touchscreen.tap(100, 100);11. 实际应用场景场景 1:响应式设计测试async function testResponsiveDesign(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const viewports = [ { name: 'Desktop', width: 1920, height: 1080 }, { name: 'Tablet', width: 768, height: 1024 }, { name: 'Mobile', width: 375, height: 667 } ]; for (const viewport of viewports) { await page.setViewport({ width: viewport.width, height: viewport.height }); await page.goto(url, { waitUntil: 'networkidle2' }); // 检查布局是否正确 const isLayoutCorrect = await page.evaluate(() => { const header = document.querySelector('header'); return header && header.offsetWidth <= viewport.width; }); console.log(`${viewport.name}: ${isLayoutCorrect ? '✓' : '✗'}`); await page.screenshot({ path: `${viewport.name.toLowerCase()}.png` }); } await browser.close();}testResponsiveDesign('https://example.com');场景 2:多设备兼容性测试async function testMultiDeviceCompatibility(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const devices = [ 'iPhone 12', 'iPad Pro', 'Pixel 5' ]; for (const deviceName of devices) { const device = puppeteer.devices[deviceName]; await page.emulate(device); await page.goto(url, { waitUntil: 'networkidle2' }); // 检查功能是否正常 const isFunctional = await page.evaluate(() => { const buttons = document.querySelectorAll('button'); return buttons.length > 0 && buttons[0].click(); }); console.log(`${deviceName}: ${isFunctional ? '✓' : '✗'}`); await page.screenshot({ path: `${deviceName.replace(/\s/g, '_')}.png` }); } await browser.close();}testMultiDeviceCompatibility('https://example.com');场景 3:移动端用户体验测试async function testMobileUX(url) { const browser = await puppeteer.launch(); const context = await browser.createIncognitoBrowserContext(); const page = await context.newPage(); // 模拟移动设备 await page.emulate(puppeteer.devices['iPhone 12']); // 设置地理位置 await page.setGeolocation({ latitude: 35.6895, longitude: 139.6917 }); await context.overridePermissions(url, ['geolocation']); await page.goto(url, { waitUntil: 'networkidle2' }); // 测试触摸交互 await page.tap('#menu-button'); await page.waitForSelector('.menu', { visible: true }); // 测试滑动 await page.evaluate(() => { window.scrollBy(0, 500); }); // 检查响应速度 const startTime = Date.now(); await page.click('#action-button'); const responseTime = Date.now() - startTime; console.log(`Response time: ${responseTime}ms`); await browser.close();}testMobileUX('https://example.com');场景 4:国际化测试async function testInternationalization(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const locales = [ { code: 'zh-CN', language: '中文' }, { code: 'en-US', language: 'English' }, { code: 'ja-JP', language: '日本語' } ]; for (const locale of locales) { // 设置语言 await page.setExtraHTTPHeaders({ 'Accept-Language': locale.code }); // 设置时区 await page.emulateTimezone(locale.code === 'zh-CN' ? 'Asia/Shanghai' : locale.code === 'en-US' ? 'America/New_York' : 'Asia/Tokyo'); await page.goto(url, { waitUntil: 'networkidle2' }); // 检查语言是否正确 const pageLanguage = await page.evaluate(() => { return document.documentElement.lang; }); console.log(`${locale.language}: ${pageLanguage}`); await page.screenshot({ path: `${locale.code}.png` }); } await browser.close();}testInternationalization('https://example.com');12. 最佳实践1. 使用内置设备预设:// 推荐await page.emulate(puppeteer.devices['iPhone 12']);// 不推荐(除非有特殊需求)await page.setViewport({ width: 390, height: 844 });2. 测试前重置状态:async function resetPage(page) { await page.emulate(puppeteer.devices['Desktop']); await page.setGeolocation({ latitude: 0, longitude: 0 }); await page.emulateTimezone('UTC');}3. 使用上下文隔离测试:const context = await browser.createIncognitoBrowserContext();const page = await context.newPage();// 测试代码await context.close();4. 记录测试结果:const testResults = [];for (const device of devices) { const result = await testDevice(device); testResults.push({ device, result });}console.log(JSON.stringify(testResults, null, 2));13. 注意事项性能影响:设备模拟可能会影响性能,特别是在频繁切换时缓存问题:不同设备可能需要清除缓存以获得准确结果网络条件:考虑模拟不同的网络速度真实设备差异:模拟可能与真实设备有细微差异权限管理:确保正确设置地理位置等权限
阅读 0·2月19日 19:39

Puppeteer 如何实现页面截图和 PDF 生成?有哪些高级选项和实际应用场景?

Puppeteer 提供了强大的页面截图和 PDF 生成功能,可以用于自动化测试、文档生成、网页归档等多种场景。1. 页面截图(Screenshots)基本截图:const puppeteer = require('puppeteer');(async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto('https://example.com'); // 基本截图 await page.screenshot({ path: 'example.png' }); await browser.close();})();截图选项详解:await page.screenshot({ path: 'screenshot.png', // 保存路径 type: 'png', // 格式:'png' 或 'jpeg' quality: 90, // JPEG 质量(0-100),仅适用于 JPEG fullPage: true, // 截取整个页面(包括滚动部分) clip: { // 裁剪区域 x: 0, y: 0, width: 800, height: 600 }, omitBackground: false, // 是否省略白色背景(透明 PNG) encoding: 'base64', // 编码方式:'base64' 或 'binary' captureBeyondViewport: false // 是否捕获视口之外的内容});截取特定元素:const element = await page.$('#header');await element.screenshot({ path: 'header.png' });截取视口区域:await page.setViewport({ width: 1920, height: 1080 });await page.screenshot({ path: 'viewport.png' });全页截图(包括滚动内容):await page.screenshot({ path: 'fullpage.png', fullPage: true});高质量 JPEG 截图:await page.screenshot({ path: 'high-quality.jpg', type: 'jpeg', quality: 95});透明背景截图:await page.screenshot({ path: 'transparent.png', omitBackground: true});获取截图为 Base64:const base64 = await page.screenshot({ encoding: 'base64'});console.log(base64);2. PDF 生成基本 PDF 生成:await page.pdf({ path: 'page.pdf' });PDF 选项详解:await page.pdf({ path: 'output.pdf', // 保存路径 scale: 1, // 缩放比例 displayHeaderFooter: false, // 是否显示页眉页脚 headerTemplate: '', // 页眉 HTML 模板 footerTemplate: '', // 页脚 HTML 模板 printBackground: false, // 是否打印背景图形 landscape: false, // 是否横向打印 pageRanges: '', // 打印页码范围,如 '1-5, 8, 11-13' format: 'A4', // 纸张格式 width: '', // 纸张宽度,如 '10in' height: '', // 纸张高度,如 '20in' margin: { // 页边距 top: '1cm', right: '1cm', bottom: '1cm', left: '1cm' }, preferCSSPageSize: false // 是否使用 CSS 页面大小});支持的纸张格式:Letter: 8.5in x 11inLegal: 8.5in x 14inTabloid: 11in x 17inLedger: 17in x 11inA0: 33.1in x 46.8inA1: 23.4in x 33.1inA2: 16.5in x 23.4inA3: 11.7in x 16.5inA4: 8.27in x 11.7inA5: 5.83in x 8.27inA6: 4.13in x 5.83in横向 PDF:await page.pdf({ path: 'landscape.pdf', landscape: true, format: 'A4'});自定义纸张大小:await page.pdf({ path: 'custom.pdf', width: '200mm', height: '300mm'});设置页边距:await page.pdf({ path: 'margin.pdf', margin: { top: '20px', right: '20px', bottom: '20px', left: '20px' }});打印背景图形:await page.pdf({ path: 'background.pdf', printBackground: true});添加页眉页脚:await page.pdf({ path: 'header-footer.pdf', displayHeaderFooter: true, headerTemplate: ` <div style="font-size: 10px; text-align: center; width: 100%;"> Generated by Puppeteer </div> `, footerTemplate: ` <div style="font-size: 10px; text-align: center; width: 100%;"> Page <span class="pageNumber"></span> of <span class="totalPages"></span> </div> `});打印特定页面:await page.pdf({ path: 'pages.pdf', pageRanges: '1-3, 5, 8-10'});3. 实际应用场景场景 1:网页归档async function archiveWebpage(url, outputPath) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url, { waitUntil: 'networkidle2' }); // 生成 PDF 归档 await page.pdf({ path: outputPath, format: 'A4', printBackground: true, margin: { top: '1cm', right: '1cm', bottom: '1cm', left: '1cm' } }); await browser.close();}archiveWebpage('https://example.com', 'archive.pdf');场景 2:批量截图服务async function batchScreenshots(urls) { const browser = await puppeteer.launch(); const page = await browser.newPage(); for (const url of urls) { await page.goto(url, { waitUntil: 'networkidle2' }); const filename = url .replace(/https?:\/\//, '') .replace(/\//g, '_') + '.png'; await page.screenshot({ path: `screenshots/${filename}`, fullPage: true }); console.log(`Screenshot saved: ${filename}`); } await browser.close();}batchScreenshots([ 'https://example.com', 'https://example.com/about', 'https://example.com/contact']);场景 3:生成发票 PDFasync function generateInvoice(invoiceData) { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 加载发票模板 await page.setContent(` <html> <head> <style> body { font-family: Arial, sans-serif; padding: 40px; } .header { text-align: center; margin-bottom: 40px; } .invoice-info { margin-bottom: 30px; } table { width: 100%; border-collapse: collapse; } th, td { border: 1px solid #ddd; padding: 10px; text-align: left; } th { background-color: #f2f2f2; } .total { text-align: right; font-weight: bold; margin-top: 20px; } </style> </head> <body> <div class="header"> <h1>INVOICE</h1> <p>Invoice #: ${invoiceData.number}</p> </div> <div class="invoice-info"> <p>Date: ${invoiceData.date}</p> <p>Customer: ${invoiceData.customer}</p> </div> <table> <thead> <tr> <th>Item</th> <th>Quantity</th> <th>Price</th> <th>Total</th> </tr> </thead> <tbody> ${invoiceData.items.map(item => ` <tr> <td>${item.name}</td> <td>${item.quantity}</td> <td>$${item.price}</td> <td>$${item.quantity * item.price}</td> </tr> `).join('')} </tbody> </table> <div class="total"> Total: $${invoiceData.total} </div> </body> </html> `); // 生成 PDF await page.pdf({ path: `invoice_${invoiceData.number}.pdf`, format: 'A4', printBackground: true, margin: { top: '20px', right: '20px', bottom: '20px', left: '20px' } }); await browser.close();}generateInvoice({ number: 'INV-001', date: '2024-01-15', customer: 'John Doe', items: [ { name: 'Product A', quantity: 2, price: 50 }, { name: 'Product B', quantity: 1, price: 75 } ], total: 175});场景 4:响应式设计测试截图async function responsiveScreenshots(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const viewports = [ { name: 'mobile', width: 375, height: 667 }, { name: 'tablet', width: 768, height: 1024 }, { name: 'desktop', width: 1920, height: 1080 } ]; for (const viewport of viewports) { await page.setViewport(viewport); await page.goto(url, { waitUntil: 'networkidle2' }); await page.screenshot({ path: `${viewport.name}.png`, fullPage: true }); console.log(`Screenshot saved: ${viewport.name}.png`); } await browser.close();}responsiveScreenshots('https://example.com');4. 性能优化建议1. 并行处理:const urls = ['url1', 'url2', 'url3'];const browser = await puppeteer.launch();await Promise.all(urls.map(async (url, index) => { const page = await browser.newPage(); await page.goto(url); await page.screenshot({ path: `screenshot-${index}.png` }); await page.close();}));await browser.close();2. 复用浏览器实例:const browser = await puppeteer.launch();// 多次使用同一个浏览器实例for (const url of urls) { const page = await browser.newPage(); await page.goto(url); await page.screenshot({ path: `${url}.png` }); await page.close();}await browser.close();3. 禁用不必要的资源:await page.setRequestInterception(true);page.on('request', (request) => { if (['image', 'font', 'media'].includes(request.resourceType())) { request.abort(); } else { request.continue(); }});5. 注意事项PDF 生成限制:PDF 生成功能仅在无头模式下可用字体支持:确保系统安装了所需的字体页面加载:使用 waitUntil: 'networkidle2' 确保页面完全加载内存管理:处理大量页面时注意内存使用错误处理:添加适当的错误处理逻辑超时设置:根据页面复杂度调整超时时间
阅读 0·2月19日 19:38

什么是 Puppeteer?它有哪些主要特性和应用场景?

Puppeteer 是一个 Node.js 库,它提供了一个高级 API 来通过 DevTools 协议控制无头 Chrome 或 Chromium。它还可以配置为使用完整(非无头)Chrome 或 Chromium。核心特性:无头浏览器控制:Puppeteer 可以在无头模式下运行 Chrome,这意味着浏览器界面不会显示,但所有功能仍然可用。页面操作:可以生成页面的屏幕截图和 PDF,抓取 SPA(单页应用)并进行内容爬取。自动化测试:可以模拟用户操作,如点击、输入文本、导航等,非常适合自动化测试。性能分析:可以捕获时间线跟踪,帮助诊断性能问题。网络拦截:可以拦截和修改网络请求,用于测试和调试。基本使用示例:const puppeteer = require('puppeteer');(async () => { // 启动浏览器 const browser = await puppeteer.launch(); // 创建新页面 const page = await browser.newPage(); // 导航到指定 URL await page.goto('https://example.com'); // 截图 await page.screenshot({ path: 'example.png' }); // 关闭浏览器 await browser.close();})();主要应用场景:网页爬虫:抓取动态渲染的网页内容自动化测试:E2E 测试、UI 测试PDF 生成:将网页转换为 PDF 文档性能监控:分析页面加载性能截图服务:批量生成网页截图与 Selenium 的区别:Puppeteer 直接使用 Chrome DevTools 协议,速度更快Selenium 支持多种浏览器,Puppeteer 主要支持 Chrome/ChromiumPuppeteer API 更简洁,学习曲线较低Puppeteer 对现代 Web 技术支持更好
阅读 0·2月19日 19:38

Puppeteer 的性能优化有哪些策略?如何提高爬虫效率和降低资源消耗?

Puppeteer 的性能优化对于提高爬虫效率、降低资源消耗和提升测试速度至关重要。以下是一些关键的优化策略和最佳实践。1. 浏览器启动优化使用合适的启动参数:const browser = await puppeteer.launch({ headless: 'new', // 使用新的无头模式(更快) args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', // 避免内存问题 '--disable-accelerated-2d-canvas', '--disable-gpu', '--window-size=1920,1080' ]});复用浏览器实例:// 不好的做法:每次任务都启动新浏览器async function badApproach(urls) { for (const url of urls) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); await browser.close(); }}// 好的做法:复用浏览器实例async function goodApproach(urls) { const browser = await puppeteer.launch(); for (const url of urls) { const page = await browser.newPage(); await page.goto(url); await page.close(); } await browser.close();}2. 页面加载优化优化 waitUntil 选项:// 根据需求选择合适的等待策略await page.goto(url, { waitUntil: 'domcontentloaded' // 最快,DOM 加载完成});await page.goto(url, { waitUntil: 'load' // 默认,所有资源加载完成});await page.goto(url, { waitUntil: 'networkidle0' // 500ms 内没有网络请求});await page.goto(url, { waitUntil: 'networkidle2' // 500ms 内不超过 2 个网络请求});禁用不必要的资源:await page.setRequestInterception(true);page.on('request', (request) => { const resourceType = request.resourceType(); // 阻止图片、字体、媒体等资源 if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) { request.abort(); } else { request.continue(); }});缓存策略:// 启用缓存await page.setCacheEnabled(true);// 禁用缓存(每次都重新加载)await page.setCacheEnabled(false);3. 并发处理使用 Promise.all 并行处理:const urls = ['url1', 'url2', 'url3'];const browser = await puppeteer.launch();// 并行处理多个页面await Promise.all(urls.map(async (url) => { const page = await browser.newPage(); await page.goto(url); await page.screenshot({ path: `${url}.png` }); await page.close();}));await browser.close();控制并发数量:async function processWithConcurrency(urls, concurrency = 3) { const browser = await puppeteer.launch(); const results = []; for (let i = 0; i < urls.length; i += concurrency) { const batch = urls.slice(i, i + concurrency); const batchResults = await Promise.all( batch.map(async (url) => { const page = await browser.newPage(); await page.goto(url); const data = await page.evaluate(() => document.body.innerText); await page.close(); return data; }) ); results.push(...batchResults); } await browser.close(); return results;}4. 内存管理及时关闭页面:// 不好的做法:不关闭页面async function badMemoryUsage(urls) { const browser = await puppeteer.launch(); for (const url of urls) { const page = await browser.newPage(); await page.goto(url); // 没有关闭页面,内存会持续增长 } await browser.close();}// 好的做法:及时关闭页面async function goodMemoryUsage(urls) { const browser = await puppeteer.launch(); for (const url of urls) { const page = await browser.newPage(); await page.goto(url); await page.close(); // 及时关闭页面 } await browser.close();}使用上下文隔离:const context = await browser.createIncognitoBrowserContext();const page = await context.newPage();// 操作页面await context.close(); // 关闭上下文,清理所有资源清理 Cookie 和存储:// 清除 Cookieawait page.deleteCookie(...await page.cookies());// 清除所有存储await page.evaluate(() => { localStorage.clear(); sessionStorage.clear();});5. 选择器优化使用高效的选择器:// 不好的做法:使用通用选择器const elements = await page.$$('div'); // 慢// 好的做法:使用具体的选择器const elements = await page.$$('.item'); // 快// 更好的做法:使用 ID 选择器const element = await page.$('#unique-id'); // 最快避免重复查询:// 不好的做法:重复查询const text1 = await page.$eval('.title', el => el.textContent);const text2 = await page.$eval('.title', el => el.textContent);// 好的做法:缓存元素const element = await page.$('.title');const text1 = await element.evaluate(el => el.textContent);const text2 = await element.evaluate(el => el.textContent);6. 网络优化使用 CDN 加速:// 如果有本地 Chromium,使用本地版本const browser = await puppeteer.launch({ executablePath: '/path/to/local/chrome'});设置超时时间:// 设置合理的超时时间await page.goto(url, { timeout: 30000 });await page.waitForSelector('.element', { timeout: 5000 });使用连接池:// 复用浏览器实例作为连接池class BrowserPool { constructor(size = 3) { this.size = size; this.browsers = []; this.queue = []; } async init() { for (let i = 0; i < this.size; i++) { this.browsers.push(await puppeteer.launch()); } } async getBrowser() { if (this.browsers.length > 0) { return this.browsers.pop(); } return new Promise(resolve => this.queue.push(resolve)); } releaseBrowser(browser) { if (this.queue.length > 0) { this.queue.shift()(browser); } else { this.browsers.push(browser); } }}7. 实际优化案例案例 1:批量截图优化async function optimizedBatchScreenshots(urls) { const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox'] }); // 禁用不必要的资源 await page.setRequestInterception(true); page.on('request', (request) => { if (['image', 'font', 'media'].includes(request.resourceType())) { request.abort(); } else { request.continue(); } }); // 并行处理 await Promise.all(urls.map(async (url, index) => { const page = await browser.newPage(); await page.goto(url, { waitUntil: 'domcontentloaded' }); await page.screenshot({ path: `screenshot-${index}.png` }); await page.close(); })); await browser.close();}案例 2:数据抓取优化async function optimizedScraping(urls) { const browser = await puppeteer.launch({ headless: 'new', args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage' ] }); const results = []; for (const url of urls) { const page = await browser.newPage(); // 禁用图片加载 await page.setRequestInterception(true); page.on('request', (request) => { if (request.resourceType() === 'image') { request.abort(); } else { request.continue(); } }); // 快速加载 await page.goto(url, { waitUntil: 'domcontentloaded' }); // 批量获取数据 const data = await page.evaluate(() => { return Array.from(document.querySelectorAll('.item')).map(item => ({ title: item.querySelector('.title')?.textContent, price: item.querySelector('.price')?.textContent })); }); results.push(...data); await page.close(); } await browser.close(); return results;}案例 3:监控和性能分析async function monitorPerformance(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 启用性能监控 const client = await page.target().createCDPSession(); await client.send('Performance.enable'); const startTime = Date.now(); await page.goto(url, { waitUntil: 'networkidle2' }); const loadTime = Date.now() - startTime; // 获取性能指标 const metrics = await client.send('Performance.getMetrics'); console.log('Load time:', loadTime); console.log('Metrics:', metrics); await browser.close();}8. 性能监控工具使用 Chrome DevTools Protocol:const client = await page.target().createCDPSession();// 启用性能监控await client.send('Performance.enable');// 获取性能指标const metrics = await client.send('Performance.getMetrics');// 启用网络监控await client.send('Network.enable');// 监听网络事件client.on('Network.requestWillBeSent', (params) => { console.log('Request:', params.request.url);});使用 Puppeteer 的性能追踪:// 开始追踪await page.tracing.start({ path: 'trace.json' });// 执行操作await page.goto('https://example.com');// 停止追踪await page.tracing.stop();9. 最佳实践总结1. 启动优化:使用 headless: 'new' 模式添加合适的启动参数复用浏览器实例2. 加载优化:选择合适的 waitUntil 策略禁用不必要的资源使用缓存3. 并发优化:使用 Promise.all 并行处理控制并发数量使用连接池4. 内存优化:及时关闭页面和浏览器使用上下文隔离清理 Cookie 和存储5. 选择器优化:使用高效的选择器避免重复查询缓存元素引用6. 网络优化:设置合理的超时时间使用本地 Chromium优化网络请求10. 常见性能问题及解决方案问题 1:内存泄漏// 解决方案:及时清理资源async function fixMemoryLeak() { const browser = await puppeteer.launch(); try { // 操作代码 } finally { await browser.close(); }}问题 2:页面加载慢// 解决方案:优化加载策略await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000});问题 3:并发过高导致崩溃// 解决方案:限制并发数量const CONCURRENCY = 3;// 使用连接池或分批处理问题 4:CPU 使用率过高// 解决方案:禁用不必要的功能const browser = await puppeteer.launch({ args: [ '--disable-gpu', '--disable-dev-shm-usage' ]});
阅读 0·2月19日 19:38