Puppeteer provides powerful page screenshot and PDF generation capabilities, which can be used for automated testing, document generation, web archiving, and many other scenarios.
1. Page Screenshots
Basic Screenshot:
javascriptconst puppeteer = require('puppeteer'); (async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto('https://example.com'); // Basic screenshot await page.screenshot({ path: 'example.png' }); await browser.close(); })();
Screenshot Options:
javascriptawait page.screenshot({ path: 'screenshot.png', // Save path type: 'png', // Format: 'png' or 'jpeg' quality: 90, // JPEG quality (0-100), only for JPEG fullPage: true, // Capture entire page (including scrolled content) clip: { // Clip region x: 0, y: 0, width: 800, height: 600 }, omitBackground: false, // Omit white background (transparent PNG) encoding: 'base64', // Encoding: 'base64' or 'binary' captureBeyondViewport: false // Capture content outside viewport });
Screenshot Specific Element:
javascriptconst element = await page.$('#header'); await element.screenshot({ path: 'header.png' });
Screenshot Viewport:
javascriptawait page.setViewport({ width: 1920, height: 1080 }); await page.screenshot({ path: 'viewport.png' });
Full Page Screenshot:
javascriptawait page.screenshot({ path: 'fullpage.png', fullPage: true });
High Quality JPEG:
javascriptawait page.screenshot({ path: 'high-quality.jpg', type: 'jpeg', quality: 95 });
Transparent Background:
javascriptawait page.screenshot({ path: 'transparent.png', omitBackground: true });
Get Screenshot as Base64:
javascriptconst base64 = await page.screenshot({ encoding: 'base64' }); console.log(base64);
2. PDF Generation
Basic PDF:
javascriptawait page.pdf({ path: 'page.pdf' });
PDF Options:
javascriptawait page.pdf({ path: 'output.pdf', // Save path scale: 1, // Scale factor displayHeaderFooter: false, // Display header/footer headerTemplate: '', // Header HTML template footerTemplate: '', // Footer HTML template printBackground: false, // Print background graphics landscape: false, // Landscape orientation pageRanges: '', // Page ranges, e.g., '1-5, 8, 11-13' format: 'A4', // Paper format width: '', // Paper width, e.g., '10in' height: '', // Paper height, e.g., '20in' margin: { // Margins top: '1cm', right: '1cm', bottom: '1cm', left: '1cm' }, preferCSSPageSize: false // Use CSS page size });
Supported Paper Formats:
Letter: 8.5in x 11inLegal: 8.5in x 14inTabloid: 11in x 17inLedger: 17in x 11inA0: 33.1in x 46.8inA1: 23.4in x 33.1inA2: 16.5in x 23.4inA3: 11.7in x 16.5inA4: 8.27in x 11.7inA5: 5.83in x 8.27inA6: 4.13in x 5.83in
Landscape PDF:
javascriptawait page.pdf({ path: 'landscape.pdf', landscape: true, format: 'A4' });
Custom Paper Size:
javascriptawait page.pdf({ path: 'custom.pdf', width: '200mm', height: '300mm' });
Set Margins:
javascriptawait page.pdf({ path: 'margin.pdf', margin: { top: '20px', right: '20px', bottom: '20px', left: '20px' } });
Print Background Graphics:
javascriptawait page.pdf({ path: 'background.pdf', printBackground: true });
Add Header/Footer:
javascriptawait page.pdf({ path: 'header-footer.pdf', displayHeaderFooter: true, headerTemplate: ` <div style="font-size: 10px; text-align: center; width: 100%;"> Generated by Puppeteer </div> `, footerTemplate: ` <div style="font-size: 10px; text-align: center; width: 100%;"> Page <span class="pageNumber"></span> of <span class="totalPages"></span> </div> ` });
Print Specific Pages:
javascriptawait page.pdf({ path: 'pages.pdf', pageRanges: '1-3, 5, 8-10' });
3. Practical Use Cases
Use Case 1: Web Archiving
javascriptasync function archiveWebpage(url, outputPath) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url, { waitUntil: 'networkidle2' }); // Generate PDF archive await page.pdf({ path: outputPath, format: 'A4', printBackground: true, margin: { top: '1cm', right: '1cm', bottom: '1cm', left: '1cm' } }); await browser.close(); } archiveWebpage('https://example.com', 'archive.pdf');
Use Case 2: Batch Screenshot Service
javascriptasync function batchScreenshots(urls) { const browser = await puppeteer.launch(); const page = await browser.newPage(); for (const url of urls) { await page.goto(url, { waitUntil: 'networkidle2' }); const filename = url .replace(/https?:\/\//, '') .replace(/\//g, '_') + '.png'; await page.screenshot({ path: `screenshots/${filename}`, fullPage: true }); console.log(`Screenshot saved: ${filename}`); } await browser.close(); } batchScreenshots([ 'https://example.com', 'https://example.com/about', 'https://example.com/contact' ]);
Use Case 3: Generate Invoice PDF
javascriptasync function generateInvoice(invoiceData) { const browser = await puppeteer.launch(); const page = await browser.newPage(); // Load invoice template await page.setContent(` <html> <head> <style> body { font-family: Arial, sans-serif; padding: 40px; } .header { text-align: center; margin-bottom: 40px; } .invoice-info { margin-bottom: 30px; } table { width: 100%; border-collapse: collapse; } th, td { border: 1px solid #ddd; padding: 10px; text-align: left; } th { background-color: #f2f2f2; } .total { text-align: right; font-weight: bold; margin-top: 20px; } </style> </head> <body> <div class="header"> <h1>INVOICE</h1> <p>Invoice #: ${invoiceData.number}</p> </div> <div class="invoice-info"> <p>Date: ${invoiceData.date}</p> <p>Customer: ${invoiceData.customer}</p> </div> <table> <thead> <tr> <th>Item</th> <th>Quantity</th> <th>Price</th> <th>Total</th> </tr> </thead> <tbody> ${invoiceData.items.map(item => ` <tr> <td>${item.name}</td> <td>${item.quantity}</td> <td>$${item.price}</td> <td>$${item.quantity * item.price}</td> </tr> `).join('')} </tbody> </table> <div class="total"> Total: $${invoiceData.total} </div> </body> </html> `); // Generate PDF await page.pdf({ path: `invoice_${invoiceData.number}.pdf`, format: 'A4', printBackground: true, margin: { top: '20px', right: '20px', bottom: '20px', left: '20px' } }); await browser.close(); } generateInvoice({ number: 'INV-001', date: '2024-01-15', customer: 'John Doe', items: [ { name: 'Product A', quantity: 2, price: 50 }, { name: 'Product B', quantity: 1, price: 75 } ], total: 175 });
Use Case 4: Responsive Design Test Screenshots
javascriptasync function responsiveScreenshots(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); const viewports = [ { name: 'mobile', width: 375, height: 667 }, { name: 'tablet', width: 768, height: 1024 }, { name: 'desktop', width: 1920, height: 1080 } ]; for (const viewport of viewports) { await page.setViewport(viewport); await page.goto(url, { waitUntil: 'networkidle2' }); await page.screenshot({ path: `${viewport.name}.png`, fullPage: true }); console.log(`Screenshot saved: ${viewport.name}.png`); } await browser.close(); } responsiveScreenshots('https://example.com');
4. Performance Optimization Tips
1. Parallel Processing:
javascriptconst urls = ['url1', 'url2', 'url3']; const browser = await puppeteer.launch(); await Promise.all(urls.map(async (url, index) => { const page = await browser.newPage(); await page.goto(url); await page.screenshot({ path: `screenshot-${index}.png` }); await page.close(); })); await browser.close();
2. Reuse Browser Instance:
javascriptconst browser = await puppeteer.launch(); // Reuse same browser instance multiple times for (const url of urls) { const page = await browser.newPage(); await page.goto(url); await page.screenshot({ path: `${url}.png` }); await page.close(); } await browser.close();
3. Disable Unnecessary Resources:
javascriptawait page.setRequestInterception(true); page.on('request', (request) => { if (['image', 'font', 'media'].includes(request.resourceType())) { request.abort(); } else { request.continue(); } });
5. Important Notes
- PDF Generation Limitation: PDF generation is only available in headless mode
- Font Support: Ensure required fonts are installed on the system
- Page Loading: Use
waitUntil: 'networkidle2'to ensure page is fully loaded - Memory Management: Monitor memory usage when processing many pages
- Error Handling: Add appropriate error handling logic
- Timeout Settings: Adjust timeout based on page complexity