Puppeteer provides rich page interaction and form manipulation capabilities, allowing you to simulate real user behaviors, which is very important for automated testing and web scraping.
1. Basic Page Operations
Navigate to Page:
javascript// Basic navigation await page.goto('https://example.com'); // Wait for network idle await page.goto('https://example.com', { waitUntil: 'networkidle2' }); // Set timeout await page.goto('https://example.com', { timeout: 30000 }); // Wait for specific conditions await page.goto('https://example.com', { waitUntil: ['load', 'domcontentloaded'] });
Refresh Page:
javascriptawait page.reload(); await page.reload({ waitUntil: 'networkidle2' });
Back and Forward:
javascriptawait page.goBack(); await page.goForward();
2. Element Selection
Puppeteer supports multiple selector methods.
Select Single Element with $:
javascript// By CSS selector const element = await page.$('#my-id'); const element = await page.$('.my-class'); const element = await page.$('div > p'); // By XPath const element = await page.$x('//div[@class="my-class"]');
Select Multiple Elements with $$:
javascript// Select all matching elements const elements = await page.$$('.item'); console.log(elements.length); // Number of elements // Iterate through elements for (const element of elements) { const text = await element.evaluate(el => el.textContent); console.log(text); }
Batch Data Retrieval with $$eval:
javascript// Get text of all elements const texts = await page.$$eval('.item', elements => { return elements.map(el => el.textContent); }); // Get attributes of all elements const hrefs = await page.$$eval('a', elements => { return elements.map(el => el.href); });
3. Click Operations
Basic Click:
javascriptawait page.click('#button'); await page.click('.submit-btn');
Click with Options:
javascriptawait page.click('#button', { button: 'left', // 'left', 'right', 'middle' clickCount: 1, // Number of clicks delay: 100, // Click delay (milliseconds) offset: { // Click position offset x: 10, y: 10 } });
Double Click:
javascriptawait page.click('#button', { clickCount: 2 });
Right Click:
javascriptawait page.click('#button', { button: 'right' });
Wait for Element to be Clickable:
javascriptawait page.waitForSelector('#button', { visible: true }); await page.click('#button');
4. Text Input
Basic Input:
javascriptawait page.type('#input', 'Hello World');
Input with Options:
javascriptawait page.type('#input', 'Hello World', { delay: 100, // Delay between characters (milliseconds) clear: true // Clear input field before typing });
Simulate Real Typing Speed:
javascriptawait page.type('#input', 'Hello World', { delay: 50 });
Clear Input Field:
javascriptawait page.click('#input'); await page.keyboard.down('Control'); await page.keyboard.press('A'); await page.keyboard.up('Control'); await page.keyboard.press('Backspace');
5. Keyboard Operations
Basic Keys:
javascriptawait page.keyboard.press('Enter'); await page.keyboard.press('Tab'); await page.keyboard.press('Escape'); await page.keyboard.press('Backspace');
Combination Keys:
javascript// Ctrl+C await page.keyboard.down('Control'); await page.keyboard.press('C'); await page.keyboard.up('Control'); // Ctrl+A (Select All) await page.keyboard.down('Control'); await page.keyboard.press('A'); await page.keyboard.up('Control'); // Ctrl+V (Paste) await page.keyboard.down('Control'); await page.keyboard.press('V'); await page.keyboard.up('Control');
Special Keys:
javascriptawait page.keyboard.press('ArrowUp'); await page.keyboard.press('ArrowDown'); await page.keyboard.press('ArrowLeft'); await page.keyboard.press('ArrowRight'); await page.keyboard.press('PageUp'); await page.keyboard.press('PageDown'); await page.keyboard.press('Home'); await page.keyboard.press('End');
6. Mouse Operations
Move Mouse:
javascriptawait page.mouse.move(100, 100); await page.mouse.move(100, 100, { steps: 10 }); // Smooth movement
Click Mouse:
javascriptawait page.mouse.click(100, 100); await page.mouse.click(100, 100, { button: 'left', clickCount: 1 });
Press and Release Mouse:
javascriptawait page.mouse.down(); await page.mouse.up(); // Drag operation await page.mouse.down({ x: 100, y: 100 }); await page.mouse.move(200, 200, { steps: 10 }); await page.mouse.up();
7. Form Operations
Fill Form:
javascript// Text input await page.type('#name', 'John Doe'); await page.type('#email', 'john@example.com'); // Select dropdown await page.selectOption('#country', 'CN'); await page.selectOption('#country', ['CN', 'US']); // Multi-select // Checkbox await page.click('#checkbox'); const isChecked = await page.$eval('#checkbox', el => el.checked); // Radio button await page.click('#radio-male'); // File upload await page.setInputFiles('#file-upload', '/path/to/file.pdf'); await page.setInputFiles('#file-upload', ['/file1.pdf', '/file2.pdf']);
Submit Form:
javascript// Click submit button await page.click('#submit-button'); // Use form submit await page.evaluate(() => { document.querySelector('form').submit(); });
8. Scroll Operations
Scroll to Bottom of Page:
javascriptawait page.evaluate(() => { window.scrollTo(0, document.body.scrollHeight); });
Scroll to Specific Element:
javascriptawait page.evaluate(() => { document.querySelector('#target').scrollIntoView(); });
Smooth Scroll:
javascriptawait page.evaluate(() => { window.scrollTo({ top: 1000, behavior: 'smooth' }); });
Scroll by Specific Distance:
javascriptawait page.evaluate(() => { window.scrollBy(0, 500); });
9. Wait for Elements
Wait for Element to Appear:
javascriptawait page.waitForSelector('.result'); await page.waitForSelector('.result', { visible: true }); await page.waitForSelector('.result', { hidden: true });
Wait for XPath:
javascriptawait page.waitForXPath('//div[@class="result"]');
Wait for Function:
javascriptawait page.waitForFunction(() => { return document.querySelectorAll('.item').length > 5; });
Wait for Navigation:
javascriptawait Promise.all([ page.waitForNavigation(), page.click('#link') ]);
10. Get Element Information
Get Text Content:
javascriptconst text = await page.$eval('.title', el => el.textContent);
Get Attribute:
javascriptconst href = await page.$eval('a', el => el.href); const id = await page.$eval('div', el => el.id);
Get Multiple Elements Information:
javascriptconst texts = await page.$$eval('.item', elements => { return elements.map(el => el.textContent); });
Check if Element Exists:
javascriptconst exists = await page.$('.element') !== null;
Check if Element is Visible:
javascriptconst isVisible = await page.$eval('.element', el => { return el.offsetParent !== null; });
11. Practical Use Cases
Use Case 1: Login Form Filling
javascriptasync function login(url, username, password) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); // Fill login form await page.type('#username', username); await page.type('#password', password); // Click login button await Promise.all([ page.waitForNavigation(), page.click('#login-button') ]); // Verify login success const isLoggedIn = await page.$('.user-profile') !== null; await browser.close(); return isLoggedIn; } login('https://example.com/login', 'user@example.com', 'password');
Use Case 2: Search Functionality Testing
javascriptasync function testSearch(url, query) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); // Enter search query await page.type('#search-input', query); // Submit search await Promise.all([ page.waitForNavigation(), page.keyboard.press('Enter') ]); // Wait for search results await page.waitForSelector('.search-result'); // Get result count const resultCount = await page.$$eval('.search-result', results => { return results.length; }); console.log(`Found ${resultCount} results for "${query}"`); await browser.close(); return resultCount; } testSearch('https://example.com', 'puppeteer');
Use Case 3: Paginated Data Scraping
javascriptasync function scrapePaginatedData(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const allData = []; let hasNextPage = true; let pageNum = 1; while (hasNextPage) { await page.goto(`${url}?page=${pageNum}`); await page.waitForSelector('.item'); // Scrape current page data const pageData = await page.$$eval('.item', items => { return items.map(item => ({ title: item.querySelector('.title').textContent, price: item.querySelector('.price').textContent })); }); allData.push(...pageData); console.log(`Scraped page ${pageNum}: ${pageData.length} items`); // Check if there's a next page hasNextPage = await page.$('.next-page:not(.disabled)') !== null; pageNum++; } await browser.close(); return allData; } scrapePaginatedData('https://example.com/products');
Use Case 4: Dynamic Content Loading
javascriptasync function scrapeDynamicContent(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); // Wait for initial content to load await page.waitForSelector('.content'); // Scroll to load more content while (true) { // Scroll to bottom await page.evaluate(() => { window.scrollTo(0, document.body.scrollHeight); }); // Wait for new content to load try { await page.waitForSelector('.new-content', { timeout: 3000 }); } catch (error) { break; // No more content } } // Get all content const allContent = await page.$$eval('.content-item', items => { return items.map(item => item.textContent); }); await browser.close(); return allContent; } scrapeDynamicContent('https://example.com/infinite-scroll');
12. Best Practices
1. Use Waiting Mechanisms:
javascript// Good practice await page.waitForSelector('.button', { visible: true }); await page.click('.button'); // Bad practice await page.click('.button'); // May fail
2. Handle Dynamic Content:
javascript// Wait for network idle await page.goto(url, { waitUntil: 'networkidle2' }); // Wait for specific element await page.waitForSelector('.loaded-content');
3. Error Handling:
javascripttry { await page.click('#button'); } catch (error) { console.error('Click failed:', error); // Retry logic }
4. Performance Optimization:
javascript// Disable unnecessary resources await page.setRequestInterception(true); page.on('request', (request) => { if (['image', 'font', 'media'].includes(request.resourceType())) { request.abort(); } else { request.continue(); } });
5. Resource Cleanup:
javascripttry { // Operation code } finally { await browser.close(); }